In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# **GLOBAL INCOME INEQUALITY ANALYSIS**

# **LOAD THE DATA**

In [None]:
import pandas as pd

data = pd.read_csv('/kaggle/input/global-income-inequality/global_income_inequality.csv')

print(data.head())

# **EXPLORE THE DATA**

In [None]:
print(data.shape)

In [None]:
print(data.info())

In [None]:
print(data.describe())

In [None]:
print(data.tail())

# **DATA CLEANING**

In [None]:
print(data.isnull().sum())

In [None]:
data.dropna(inplace = True)

# **DATA VISUALISATION**

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Plot the distribution of Gini Index
plt.figure(figsize=(10, 6))
sns.histplot(data['Gini Index'], bins=30, kde=True)
plt.title('Distribution of Gini Index')
plt.xlabel('Gini Index')
plt.ylabel('Frequency')
plt.show()

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(x='Country',y='Gini Index', data=data)
plt.title('Income Inequality by Region')
plt.xlabel('Region')
plt.ylabel('Income')
plt.xticks(rotation=45)
plt.show()

In [None]:
import numpy as np

# Select only numeric columns for correlation heatmap
numeric_data = data.select_dtypes(include=[np.number])

# Plot correlation heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(numeric_data.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap')
plt.show()

# **ANALYZE INEQUALITY**

The Gini coefficient is a measure of inequality within a distribution, commonly used to assess income or wealth inequality within a population. It ranges from 0 to 1, where:

0 represents perfect equality (everyone has the same income).
1 represents perfect inequality (one person has all the income, while everyone else has none).

In [None]:
import numpy as np

# Function to calculate Gini coefficient
def gini_coefficient(data):
    n = len(data)
    if n == 0:
        return 0
    # Sort the data
    sorted_data = np.sort(data)
    cumulative_values = np.cumsum(sorted_data)
    # Gini calculation
    return (2 * np.sum(cumulative_values) / np.sum(sorted_data) - n - 1) / n

# Calculate Gini coefficient for a specific region
gini = gini_coefficient(data['Gini Index'])
print(f"Gini Coefficient: {gini}")

# **Temporal Analysis**

In [None]:
income_over_time = data.groupby('Year')['Gini Index'].mean().reset_index()
plt.figure(figsize=(12, 6))
sns.lineplot(x='Year', y='Gini Index', data=income_over_time)
plt.title('Average Income Over Time')
plt.xlabel('Year')
plt.ylabel('Average Income')
plt.show()

The analysis of global income inequality highlights critical disparities that necessitate immediate action. By understanding the underlying factors contributing to inequality, stakeholders can develop informed strategies to promote economic equity and improve living standards across the globe. The findings of this report underscore the urgency of addressing income inequality as a fundamental aspect of sustainable development.

