<a href="https://colab.research.google.com/github/clemvnt/training-datamining-mds/blob/issue7/20200424_07_BERLIN_final2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Objective**

The Gini index measures the equality per country. The closest to 0 the index is, the most equal the country is. The world average Gini index is at 38.

**Data**

GINI INDEX

**Source**

World Bank, Development Research Group.

**Pitch**

https://drive.google.com/file/d/10PXCX0Czck8QJwhinVEoKV3MZGvlAVDC/view

**Step 1 : Import dependencies**


In [0]:
import pandas as pd
from pandas_datareader import wb
import plotly.graph_objects as go
import plotly.express as px 

**Step 2 : Get the association between the country and the ISO code**

In [0]:
countries = wb.get_countries()
countries = countries[['name', 'iso3c']]
countries.columns = ['country', 'iso3c']
countries

Unnamed: 0,country,iso3c
0,Aruba,ABW
1,Afghanistan,AFG
2,Africa,AFR
3,Angola,AGO
4,Albania,ALB
...,...,...
299,Sub-Saharan Africa excluding South Africa and ...,XZN
300,"Yemen, Rep.",YEM
301,South Africa,ZAF
302,Zambia,ZMB


**Step 3 : Get gini index indicator per country**

In [0]:
indicators = wb.download(indicator=['SI.POV.GINI'], country='all', start=1967, end=2018)
indicators.columns = ['GINI_INDEX']
indicators

Unnamed: 0_level_0,Unnamed: 1_level_0,GINI_INDEX
country,year,Unnamed: 2_level_1
Arab World,2018,
Arab World,2017,
Arab World,2016,
Arab World,2015,
Arab World,2014,
...,...,...
Zimbabwe,1971,
Zimbabwe,1970,
Zimbabwe,1969,
Zimbabwe,1968,


****Step 4 : Merge previous tables****

In [0]:
master_table = pd.merge(indicators.reset_index(), countries, left_on='country', right_on='country')
master_table = master_table.set_index(['country', 'iso3c', 'year'])
master_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,GINI_INDEX
country,iso3c,year,Unnamed: 3_level_1
Arab World,ARB,2018,
Arab World,ARB,2017,
Arab World,ARB,2016,
Arab World,ARB,2015,
Arab World,ARB,2014,
...,...,...,...
Zimbabwe,ZWE,1971,
Zimbabwe,ZWE,1970,
Zimbabwe,ZWE,1969,
Zimbabwe,ZWE,1968,


**Step 5 : Pivot previous table and fill in undefined values with values from previous years**

In [0]:
pivoted_table = pd.pivot_table(master_table, index=['country', 'iso3c'], columns='year', values='GINI_INDEX')
pivoted_table = pivoted_table.ffill(axis=1)
pivoted_table

Unnamed: 0_level_0,year,1967,1969,1971,1974,1975,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
country,iso3c,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1
Albania,ALB,,,,,,,,,,,,,,,,,,,,,,,,27.0,27.0,27.0,27.0,27.0,27.0,31.7,31.7,31.7,30.6,30.6,30.6,30.0,30.0,30.0,30.0,29.0,29.0,34.6,32.9,33.7,33.2,33.2
Algeria,DZA,,,,,,,,,,,,,,,,40.2,40.2,40.2,40.2,40.2,40.2,40.2,35.3,35.3,35.3,35.3,35.3,35.3,35.3,35.3,35.3,35.3,35.3,35.3,35.3,35.3,35.3,35.3,27.6,27.6,27.6,27.6,27.6,27.6,27.6,27.6
Angola,AGO,,,,,,,,,,,,,,,,,,,,,,,,,,,,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,42.7,42.7,42.7,42.7,42.7,42.7,42.7,42.7,42.7,42.7,51.3
Argentina,ARG,,,,,,,,40.8,40.8,40.8,40.8,40.8,40.8,42.8,45.3,45.3,45.3,45.3,46.8,45.5,44.9,45.9,48.9,49.5,49.1,50.7,49.8,51.1,53.3,53.8,51.2,48.6,48.0,46.7,46.6,45.3,44.1,44.5,42.7,41.4,41.0,41.7,41.7,42.0,41.2,41.4
Armenia,ARM,,,,,,,,,,,,,,,,,,,,,,,,,,,36.2,36.2,35.4,34.8,33.0,37.5,36.0,29.7,31.2,29.2,28.0,30.0,29.4,29.6,30.6,31.5,32.4,32.5,33.6,34.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Vietnam,VNM,,,,,,,,,,,,,,,,,,,,35.7,35.7,35.7,35.7,35.7,35.7,35.4,35.4,35.4,35.4,37.0,37.0,36.8,36.8,35.8,35.8,35.6,35.6,39.3,39.3,35.6,35.6,34.8,34.8,35.3,35.3,35.7
West Bank and Gaza,PSE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,34.0,34.7,34.0,35.6,35.6,34.5,35.3,34.4,34.4,34.4,34.4,34.4,33.7,33.7,33.7
"Yemen, Rep.",YEM,,,,,,,,,,,,,,,,,,,,,,,,,,35.0,35.0,35.0,35.0,35.0,35.0,35.0,34.7,34.7,34.7,34.7,34.7,34.7,34.7,34.7,34.7,36.7,36.7,36.7,36.7,36.7
Zambia,ZMB,,,,,,,,,,,,,,,,,,,60.5,60.5,52.6,52.6,52.6,48.3,48.3,49.1,49.1,49.1,49.1,42.1,42.1,54.3,54.3,54.6,54.6,54.6,54.6,55.6,55.6,55.6,55.6,55.6,57.1,57.1,57.1,57.1


**Step 5 : Show a map of gini index per country over the years (from 1969 to 2018)**

In [0]:
pivoted_table = pd.pivot_table(master_table, index=['country', 'iso3c'], columns='year', values='GINI_INDEX')
pivoted_table = pivoted_table.ffill(axis=1)
countries = list(pivoted_table.index.get_level_values(0))
locations = list(pivoted_table.index.get_level_values(1))

data = []
steps = []
i = 0
for year in pivoted_table.columns:
  data.append(dict(
    type='choropleth',
    name='',
    locations=locations,
    z=pivoted_table[year],
    hovertext=countries,
    colorscale=px.colors.sequential.Reds,
    visible=year=='2018'
  ))
  
  step = dict(
    method='restyle',
    args=['visible', [False] * len(pivoted_table.columns)],
    label=year)
  step['args'][1][i] = True
  steps.append(step)

  i = i + 1

layout = go.Layout(
  title=dict(
    text='Evolution of the gini index from 1969 to 2018', 
    x=0.5,
    font=dict(
      size=21,
    )
  ),
  sliders=[dict(steps=steps, active=len(data) - 1)],
  annotations=[dict(
    text='Updated in 2018 from The World Bank',
    showarrow=False,
    x=1,
    y=-0.05
  )],
  autosize=True,
  height=800
)

fig = go.Figure(data, layout)
fig

**Step 6 : Export HTML**

In [0]:
fig.write_html("file.html")