# **Inequality and Economic Growth**

## **Import Libraries**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.cluster import KMeans

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential, layers

## **Import Dataset**

In [6]:
countries = pd.read_csv("inequality.csv", encoding="latin-1", error_bad_lines=False)

### **Data Engineering**

In [7]:
countries.head()

Unnamed: 0.1,Unnamed: 0,country,year,gini_reported,q1,q2,q3,q4,q5,d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,bottom5,top5,resource,resource_detailed,scale,scale_detailed,sharing_unit,reference_unit,areacovr,areacovr_detailed,popcovr,popcovr_detailed,region_un,region_un_sub,region_wb,eu,oecd,incomegroup,mean,median,reference_period,exchangerate,mean_usd,median_usd
0,1,Afghanistan,2008,29.0,9.0,13.0,17.0,22.0,39.0,,,,,,,,,,,,,Consumption,Consumption,Per capita,Per capita,Household,Person,All,All,All,All,Asia,Southern Asia,South Asia,Non-EU,Non-OECD,Low income,36588.0,,Month,50.249615,,
1,2,Albania,2012,28.96,8.85,13.17,17.34,22.81,37.82,3.66,5.19,6.14,7.03,8.08,9.26,10.52,12.29,14.89,22.93,,,Consumption,Consumption,Per capita,Per capita,Household,Person,All,All,All,All,Europe,Southern Europe,Europe and Central Asia,Non-EU,Non-OECD,Upper middle income,2703.36,2340.72,Year,108.184645,2703.0,2341.0
2,3,Algeria,2012,27.62,9.36,13.67,17.47,22.29,37.22,4.05,5.31,6.36,7.31,8.24,9.23,10.39,11.9,14.36,22.86,,,Consumption,Consumption,Per capita,Per capita,Household,Person,All,All,All,All,Africa,Northern Africa,Middle East and North Africa,Non-EU,Non-OECD,Upper middle income,2972.76,2592.0,Year,77.535967,2973.0,2592.0
3,4,Angola,2009,55.0,3.0,7.0,12.0,19.0,59.0,,,,,,,,,,,,,Income (net/gross),"Income, net/gross",Per capita,Per capita,Household,Person,All,All,All,All,Africa,Middle Africa,Sub-Saharan Africa,Non-EU,Non-OECD,Lower middle income,105204.0,,Month,79.328167,,
4,5,Argentina,2017,41.3,5.1,9.9,15.4,23.0,46.7,1.8,3.3,4.4,5.5,6.9,8.5,10.3,12.7,17.0,29.7,,,Income (net/gross),"Income, net/gross",Per capita,Per capita,Household,Person,Urban,Urban,All,All,Americas,South America,Latin America and the Caribbean,Non-EU,Non-OECD,High income,,,,16.562707,,


In [8]:
countries['incomegroup'].unique()

array(['Low income', 'Upper middle income', 'Lower middle income',
       'High income'], dtype=object)

In [9]:
label = {
    'Low income':1,
    'Lower middle income':2,
    'Upper middle income':3,
    'High income':4

}

countries['incomegroup'] = countries['incomegroup'].map(label)

  

In [10]:
countries['eu'].unique()

array(['Non-EU', 'EU'], dtype=object)

In [11]:
europe = countries.loc[countries['eu'] == 'EU']

In [12]:
europe = europe.drop(columns='eu')

In [13]:
countries['region_wb'].unique()

array(['South Asia', 'Europe and Central Asia',
       'Middle East and North Africa', 'Sub-Saharan Africa',
       'Latin America and the Caribbean', 'East Asia and the Pacific',
       'North America'], dtype=object)

In [14]:
colums_1 = ['region_wb','eu']

In [15]:
latin_america = countries.loc[countries['region_wb'] == 'Latin America and the Caribbean']
latin_america = latin_america.drop(columns=colums_1)
latin_america.sort_values('year').groupby('country')

south_asia =  countries.loc[countries['region_wb'] == 'South Asia']
south_asia = south_asia.drop(columns=colums_1)
south_asia.sort_values('year').groupby('country')

middle_east_and_africa = countries.loc[countries['region_wb'] == 'Middle East and North Africa']
middle_east_and_africa = middle_east_and_africa.drop(columns=colums_1)
middle_east_and_africa.sort_values('year').groupby('country')

subsaharian = countries.loc[countries['region_wb'] == 'Sub-Saharan Africa']
subsaharian = subsaharian.drop(columns=colums_1)
subsaharian.sort_values('year').groupby('country')

asia_pacific = countries.loc[countries['region_wb'] == 'East Asia and the Pacific']
asia_pacific = asia_pacific.drop(columns=colums_1)
asia_pacific.sort_values('year').groupby('country')

north_america = countries.loc[countries['region_wb'] == 'North America']
north_america = north_america.drop(columns=colums_1)
north_america.sort_values('year').groupby('country')

europe_central_asia = countries.loc[countries['region_wb'] == 'Europe and Central Asia']

europe_central_asia = europe_central_asia.loc[europe_central_asia['eu'] != 'EU']

europe_central_asia = europe_central_asia.drop(columns=colums_1)

europe_central_asia.sort_values('year').groupby('country')


<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f35aed7ed30>

In [16]:
latin_america.sort_values('year').groupby('country')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f35aed64b00>

In [17]:
latin_america

Unnamed: 0.1,Unnamed: 0,country,year,gini_reported,q1,q2,q3,q4,q5,d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,bottom5,top5,resource,resource_detailed,scale,scale_detailed,sharing_unit,reference_unit,areacovr,areacovr_detailed,popcovr,popcovr_detailed,region_un,region_un_sub,oecd,incomegroup,mean,median,reference_period,exchangerate,mean_usd,median_usd
4,5,Argentina,2017,41.3,5.1,9.9,15.4,23.0,46.7,1.8,3.3,4.4,5.5,6.9,8.5,10.3,12.7,17.0,29.7,,,Income (net/gross),"Income, net/gross",Per capita,Per capita,Household,Person,Urban,Urban,All,All,Americas,South America,Non-OECD,4,,,,16.562707,,
8,9,Barbados,2010,47.0,4.3,8.7,12.6,19.4,54.9,1.4,2.9,3.9,4.8,5.7,6.9,8.4,11.0,15.0,39.9,,,Consumption,Consumption,Per capita,Per capita,Household,Person,All,All,All,All,Americas,Caribbean,Non-OECD,4,,,,2.0,,
10,11,Belize,1999,53.26,3.21,7.68,12.01,19.4,57.69,0.93,2.28,3.4,4.28,5.34,6.67,8.48,10.92,15.26,42.43,,,Income (net/gross),"Income, net/gross",Per capita,Per capita,Household,Person,All,All,All,All,Americas,Central America,Non-OECD,3,3016.92,1791.24,Year,2.0,3017.0,1791.0
13,14,Bolivia,2017,44.0,4.1,9.5,14.9,22.9,48.5,1.2,2.9,4.1,5.4,6.7,8.2,10.2,12.7,16.8,31.7,,,Income (net/gross),"Income, net/gross",Per capita,Per capita,Household,Person,All,All,All,All,Americas,South America,Non-OECD,2,,,,6.91,,
16,17,Brazil,2016,53.0,3.3,7.4,12.2,19.4,57.6,1.1,2.2,3.2,4.2,5.4,6.8,8.4,11.0,16.1,41.5,,,Income (net/gross),"Income, net/gross",Per capita,Per capita,Household,Person,All,With rural north,All,All,Americas,South America,Non-OECD,3,,,,3.491313,,
24,25,Chile,2015,48.5,4.72,8.72,12.75,19.23,54.56,1.68,3.04,3.91,4.81,5.79,6.96,8.47,10.76,15.21,39.35,0.58,27.31,Income (net),"Income, net",Per capita,Per capita,Household,Person,All,All,All,All,Americas,South America,OECD,4,3048424.0,1933352.0,Year,654.124084,,
26,27,Colombia,2016,50.7,3.9,8.0,12.6,19.9,55.5,1.3,2.6,3.5,4.5,5.6,7.0,8.7,11.2,15.6,39.9,,,Income (net/gross),"Income, net/gross",Per capita,Per capita,Household,Person,All,All,All,All,Americas,South America,Non-OECD,3,,,,3054.121673,,
30,31,Costa Rica,2017,48.3,4.4,8.4,12.8,20.4,53.9,1.6,2.8,3.7,4.7,5.7,7.1,8.8,11.6,17.0,36.9,,,Income (net/gross),"Income, net/gross",Per capita,Per capita,Household,Person,All,All,All,All,Americas,Central America,Non-OECD,3,,,,567.51309,,
33,34,Dominican Republic,2016,45.3,5.0,9.2,13.9,20.9,51.1,1.9,3.1,4.1,5.1,6.2,7.7,9.3,11.6,16.1,35.0,,,Income (net/gross),"Income, net/gross",Per capita,Per capita,Household,Person,All,All,All,All,Americas,Caribbean,Non-OECD,3,,,,46.07798,,
34,35,Ecuador,2016,44.9,4.7,9.4,14.1,21.3,50.4,1.6,3.1,4.2,5.2,6.4,7.7,9.4,11.9,16.2,34.2,,,Income (net/gross),"Income, net/gross",Per capita,Per capita,Household,Person,All,All,All,All,Americas,South America,Non-OECD,3,,,,1.0,,


In [18]:
pd.DataFrame.to_csv(latin_america, "latinamerica.csv")

In [19]:
pd.DataFrame.to_csv(europe_central_asia, "eurasia.csv")

In [20]:
pd.DataFrame.to_csv(europe, "europe.csv")

In [21]:
pd.DataFrame.to_csv(north_america, "northamerica.csv")

In [22]:
pd.DataFrame.to_csv(asia_pacific, "asia_pacific.csv")

In [23]:
pd.DataFrame.to_csv(middle_east_and_africa, "arab.csv")

In [24]:
pd.DataFrame.to_csv(subsaharian, "subsaharian.csv")