In [7]:
import pandas as pd
from pandas import DataFrame
from typing import Set, Any


def remove_others(df: DataFrame, columns: Set[Any]):
    cols_total: Set[Any] = set(df.columns)
    diff: Set[Any] = cols_total - columns
    df.drop(diff, axis=1, inplace=True)

# Importing csv file and making dataframe, renaming some columns
df_HDI = pd.read_csv('HDI data.csv')
remove_others(df_HDI, {"country", "hdr2019", "pop2022"})
df_HDI = df_HDI.rename(columns={"country": "Country"})
df_HDI = df_HDI.rename(columns={"hdr2019": "HDI"})
df_HDI = df_HDI.rename(columns={"pop2022": "Population"})
df_HDI


Unnamed: 0,Country,HDI,Population
0,Norway,0.957,5434319.0
1,Switzerland,0.955,8740472.0
2,Ireland,0.955,5023109.0
3,Hong Kong,0.949,7488865.0
4,Iceland,0.949,372899.0
...,...,...,...
187,Somalia,0.000,17597511.0
188,Monaco,0.000,36469.0
189,San Marino,0.000,33660.0
190,Nauru,0.000,12668.0


In [8]:
# Importing csv file and making dataframe, renaming some columns

df_Gini = pd.read_csv('Gini data.csv')
df_Gini
remove_others(df_Gini, {"country", "giniWB"})
df_Gini = df_Gini.rename(columns={"country": "Country"})
df_Gini = df_Gini.rename(columns={"giniWB": "Gini coefficent - World Bank"})
df_Gini


Unnamed: 0,Country,Gini coefficent - World Bank
0,South Africa,63.0
1,Namibia,59.1
2,Suriname,57.9
3,Zambia,57.1
4,Sao Tome and Principe,56.3
...,...,...
172,Macau,
173,Jersey,
174,Greenland,
175,Faroe Islands,


In [9]:
from functools import reduce

# Merging the two dataframes togther
dfs = [df_HDI,df_Gini]
merged_df = reduce(lambda left, right: pd.merge(left, right, on=['Country'], how='outer'), dfs)


In [10]:
merged_df

Unnamed: 0,Country,HDI,Population,Gini coefficent - World Bank
0,Norway,0.957,5434319.0,27.6
1,Switzerland,0.955,8740472.0,33.1
2,Ireland,0.955,5023109.0,31.4
3,Hong Kong,0.949,7488865.0,
4,Iceland,0.949,372899.0,26.1
...,...,...,...,...
195,Macau,,,
196,Jersey,,,
197,Greenland,,,
198,Faroe Islands,,,


In [11]:
# Droping all rows with NaN
df_clean = merged_df.dropna()


In [18]:
# Dropping values that are equal to 0, as they are not true values

df_clean = df_clean.reset_index(drop=True)
df_clean= df_clean[df_clean['HDI'] != 0.00]
df_clean

Unnamed: 0,Country,HDI,Population,Gini coefficent - World Bank
0,Norway,0.957,5434319.0,27.6
1,Switzerland,0.955,8740472.0,33.1
2,Ireland,0.955,5023109.0,31.4
3,Iceland,0.949,372899.0,26.1
4,Germany,0.947,83369843.0,31.9
...,...,...,...,...
153,Burundi,0.433,12889576.0,38.6
154,South Sudan,0.433,10913164.0,44.1
155,Chad,0.398,17723315.0,43.3
156,Central African Republic,0.397,5579144.0,56.2


In [19]:
import json
# Exporting as a json file
exported_values = list(df_clean.T.to_dict().values())
open('HDI vs Gini data.json' , 'w').write(json.dumps(exported_values))


16206