In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as pt
import datetime as dt
import warnings
warnings.filterwarnings("ignore")

In [2]:
GII = pd.read_csv("GII.csv") 
GII

Unnamed: 0,Country,Year,Population,Gini Index,Average Income (USD),Top 10% Income Share (%),Bottom 10% Income Share (%),Income Group
0,United States,2000,30072337,0.333105,37106.735536,30.780946,1.096329,High Income
1,United States,2001,15189465,0.508761,16250.105075,54.563475,1.441728,Low Income
2,United States,2002,89001835,0.229093,40069.586399,37.200666,1.374502,High Income
3,United States,2003,297595139,0.230673,1233.469995,40.453246,4.493424,Lower Middle Income
4,United States,2004,23680558,0.544657,22950.248061,45.343660,3.250805,High Income
...,...,...,...,...,...,...,...,...
355,Saudi Arabia,2019,76352940,0.298362,22142.286345,27.316935,4.168030,High Income
356,Saudi Arabia,2020,59947230,0.300902,23503.216793,51.791266,1.842205,Low Income
357,Saudi Arabia,2021,8614363,0.470990,44303.921142,24.308291,1.967456,Low Income
358,Saudi Arabia,2022,210722601,0.210066,56770.870968,42.414258,2.048774,Low Income


In [3]:
GII.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 360 entries, 0 to 359
Data columns (total 8 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Country                      360 non-null    object 
 1   Year                         360 non-null    int64  
 2   Population                   360 non-null    int64  
 3   Gini Index                   360 non-null    float64
 4   Average Income (USD)         360 non-null    float64
 5   Top 10% Income Share (%)     360 non-null    float64
 6   Bottom 10% Income Share (%)  360 non-null    float64
 7   Income Group                 360 non-null    object 
dtypes: float64(4), int64(2), object(2)
memory usage: 22.6+ KB


In [4]:
GII.rename(columns={"Average Income (USD)":"Average Income","Top 10% Income Share (%)":"Top10 IncomeShare%",
                   "Bottom 10% Income Share (%)":"Bottom10 IncomeShare%"},inplace=True)
GII

Unnamed: 0,Country,Year,Population,Gini Index,Average Income,Top10 IncomeShare%,Bottom10 IncomeShare%,Income Group
0,United States,2000,30072337,0.333105,37106.735536,30.780946,1.096329,High Income
1,United States,2001,15189465,0.508761,16250.105075,54.563475,1.441728,Low Income
2,United States,2002,89001835,0.229093,40069.586399,37.200666,1.374502,High Income
3,United States,2003,297595139,0.230673,1233.469995,40.453246,4.493424,Lower Middle Income
4,United States,2004,23680558,0.544657,22950.248061,45.343660,3.250805,High Income
...,...,...,...,...,...,...,...,...
355,Saudi Arabia,2019,76352940,0.298362,22142.286345,27.316935,4.168030,High Income
356,Saudi Arabia,2020,59947230,0.300902,23503.216793,51.791266,1.842205,Low Income
357,Saudi Arabia,2021,8614363,0.470990,44303.921142,24.308291,1.967456,Low Income
358,Saudi Arabia,2022,210722601,0.210066,56770.870968,42.414258,2.048774,Low Income


In [5]:
roundoff_columns= ["Gini Index","Top10 IncomeShare%","Bottom10 IncomeShare%","Average Income"]
GII[roundoff_columns] = GII[roundoff_columns].apply(lambda x: round(x,2))

In [6]:
countries = GII['Country'].unique()
countries

array(['United States', 'India', 'Brazil', 'Germany', 'Nigeria', 'China',
       'United Kingdom', 'Russia', 'Japan', 'South Africa', 'Canada',
       'Mexico', 'France', 'Australia', 'Saudi Arabia'], dtype=object)

In [7]:
developed_countries = ['United States', 'Germany', 'United Kingdom', 'Japan', 'Canada', 
                       'France', 'Australia', 'Saudi Arabia']

GII['Economic Group'] = GII['Country'].apply(lambda x: 'Developed' if x in developed_countries else 'Developing')

In [8]:
oecd_countries = ['Australia', 'Austria', 'Belgium', 'Canada', 'Chile', 'Colombia', 'Costa Rica', 'Czechia', 'Denmark', 
    'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Iceland', 'Ireland', 'Israel', 'Italy', 
    'Japan', 'Korea', 'Latvia', 'Lithuania', 'Luxembourg', 'Mexico', 'Netherlands', 'New Zealand', 'Norway', 
    'Poland', 'Portugal', 'Slovak Republic', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'Türkiye', 
    'United Kingdom', 'United States']

oecd_key_partners = ['Brazil', 'China', 'India', 'Indonesia', 'South Africa']

GII['OECD Status'] = GII['Country'].apply(lambda x:'Member' if x in oecd_countries else ('Key Partner' if x in oecd_key_partners else 'Non Member'))

In [9]:
GII

Unnamed: 0,Country,Year,Population,Gini Index,Average Income,Top10 IncomeShare%,Bottom10 IncomeShare%,Income Group,Economic Group,OECD Status
0,United States,2000,30072337,0.33,37106.74,30.78,1.10,High Income,Developed,Member
1,United States,2001,15189465,0.51,16250.11,54.56,1.44,Low Income,Developed,Member
2,United States,2002,89001835,0.23,40069.59,37.20,1.37,High Income,Developed,Member
3,United States,2003,297595139,0.23,1233.47,40.45,4.49,Lower Middle Income,Developed,Member
4,United States,2004,23680558,0.54,22950.25,45.34,3.25,High Income,Developed,Member
...,...,...,...,...,...,...,...,...,...,...
355,Saudi Arabia,2019,76352940,0.30,22142.29,27.32,4.17,High Income,Developed,Non Member
356,Saudi Arabia,2020,59947230,0.30,23503.22,51.79,1.84,Low Income,Developed,Non Member
357,Saudi Arabia,2021,8614363,0.47,44303.92,24.31,1.97,Low Income,Developed,Non Member
358,Saudi Arabia,2022,210722601,0.21,56770.87,42.41,2.05,Low Income,Developed,Non Member


In [10]:
g20_countries = ['Argentina', 'Australia', 'Brazil', 'Canada', 'China', 'France', 'Germany', 
                 'India', 'Indonesia', 'Italy', 'Japan', 'Republic of Korea', 'Mexico', 'Russia', 
                 'Saudi Arabia', 'South Africa', 'Türkiye', 'United Kingdom', 'United States']

# List of EU member countries
eu_countries = ['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark',
                'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 
                'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia',
                'Slovenia', 'Spain', 'Sweden']

def classify_country(country):
    if country in g20_countries:
        return 'G20 Member'
    elif country in eu_countries:
        return 'EU Member'
    else:
        return 'Non G20 Member'

GII['G20/EU Status'] = GII['Country'].apply(classify_country)

In [11]:
def classify_region(country):
    if country in ['United States', 'Canada', 'Mexico']:
        return 'North America'
    elif country in ['Brazil', 'Argentina', 'Chile', 'Colombia', 'Peru']:
        return 'Latin America and the Caribbean'
    elif country in ['United Kingdom', 'France', 'Germany', 'Netherlands', 'Belgium', 
                     'Sweden', 'Norway', 'Finland', 'Denmark', 'Austria', 'Switzerland', 
                     'Ireland', 'Spain', 'Portugal', 'Italy']:
        return 'Western Europe'
    elif country in ['Russia', 'Ukraine', 'Poland', 'Hungary', 'Czech Republic', 'Slovakia', 
                     'Lithuania', 'Latvia', 'Estonia', 'Kazakhstan', 'Uzbekistan']:
        return 'Eastern Europe and Central Asia'
    elif country in ['China', 'Japan', 'South Korea', 'Mongolia', 'Taiwan']:
        return 'East Asia'
    elif country in ['India', 'Indonesia', 'Thailand', 'Malaysia', 'Philippines', 'Vietnam']:
        return 'Southeast Asia'
    elif country in ['Saudi Arabia', 'Egypt', 'Iran', 'Turkey', 'Iraq', 'United Arab Emirates']:
        return 'Middle East and North Africa'
    elif country in ['Nigeria', 'South Africa', 'Kenya', 'Ghana', 'Uganda', 'Tanzania']:
        return 'Sub-Saharan Africa'
    elif country in ['Australia', 'New Zealand', 'Papua New Guinea']:
        return 'Oceania'
    else:
        return 'Other'

GII['Regional Group'] = GII['Country'].apply(classify_region)

In [12]:
GII

Unnamed: 0,Country,Year,Population,Gini Index,Average Income,Top10 IncomeShare%,Bottom10 IncomeShare%,Income Group,Economic Group,OECD Status,G20/EU Status,Regional Group
0,United States,2000,30072337,0.33,37106.74,30.78,1.10,High Income,Developed,Member,G20 Member,North America
1,United States,2001,15189465,0.51,16250.11,54.56,1.44,Low Income,Developed,Member,G20 Member,North America
2,United States,2002,89001835,0.23,40069.59,37.20,1.37,High Income,Developed,Member,G20 Member,North America
3,United States,2003,297595139,0.23,1233.47,40.45,4.49,Lower Middle Income,Developed,Member,G20 Member,North America
4,United States,2004,23680558,0.54,22950.25,45.34,3.25,High Income,Developed,Member,G20 Member,North America
...,...,...,...,...,...,...,...,...,...,...,...,...
355,Saudi Arabia,2019,76352940,0.30,22142.29,27.32,4.17,High Income,Developed,Non Member,G20 Member,Middle East and North Africa
356,Saudi Arabia,2020,59947230,0.30,23503.22,51.79,1.84,Low Income,Developed,Non Member,G20 Member,Middle East and North Africa
357,Saudi Arabia,2021,8614363,0.47,44303.92,24.31,1.97,Low Income,Developed,Non Member,G20 Member,Middle East and North Africa
358,Saudi Arabia,2022,210722601,0.21,56770.87,42.41,2.05,Low Income,Developed,Non Member,G20 Member,Middle East and North Africa


In [13]:
GII.to_csv('GII_data.csv', index=False)

In [14]:
GII.describe()

Unnamed: 0,Year,Population,Gini Index,Average Income,Top10 IncomeShare%,Bottom10 IncomeShare%
count,360.0,360.0,360.0,360.0,360.0,360.0
mean,2011.5,172225100.0,0.425611,35433.844639,40.200389,2.992611
std,6.931821,101695400.0,0.128098,20425.341258,11.495397,1.1475
min,2000.0,5493471.0,0.2,1031.46,20.05,1.02
25%,2005.75,78732800.0,0.32,18655.895,30.4875,1.995
50%,2011.5,175491300.0,0.43,34470.82,40.39,2.975
75%,2017.25,254122500.0,0.54,54259.9925,49.9175,4.025
max,2023.0,349176800.0,0.65,69732.71,59.94,4.99
