# Happiness Report & Ease of doing business index

## My idea for this is to combine the data from the Happiness Report with data from the Ease of doing business index

 This seems to be a bit off, but maybe some interesting correlations are found

In [2]:
import pandas as pd
import numpy as np
import plotly
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True)

In [87]:
happiness = pd.read_csv('data/2017.csv', dtype={'Country': np.object}).drop(['Happiness.Score', 'Whisker.high', 'Whisker.low'], axis=1)
happiness.head()

Unnamed: 0,Country,Happiness.Rank,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual
0,Norway,1,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027
1,Denmark,2,1.482383,1.551122,0.792566,0.626007,0.35528,0.40077,2.313707
2,Iceland,3,1.480633,1.610574,0.833552,0.627163,0.47554,0.153527,2.322715
3,Switzerland,4,1.56498,1.516912,0.858131,0.620071,0.290549,0.367007,2.276716
4,Finland,5,1.443572,1.540247,0.809158,0.617951,0.245483,0.382612,2.430182


In [96]:
business = pd.read_csv('data/ease-of-doing-business.csv')
business = business.loc[business['Indicator Name'] == 'Ease of doing business index (1=easiest to 185=most difficult)'] #Just interested in overall result
business = business[['Country Name', 'Country Code', '2017']] #Just those 3 columns are needed
business = business.rename(index=str, columns={"2017": "Ease of doing business"}) #Renaming 2017 to "Ease of doing business
business = business.dropna()
business_plot = business
business.head()

Unnamed: 0,Country Name,Country Code,Ease of doing business
13,Afghanistan,AFG,183.0
72,Albania,ALB,65.0
131,Algeria,DZA,166.0
308,Angola,AGO,175.0
367,Antigua and Barbuda,ATG,107.0


## In order to combine both dataframes, I convert both dataframes with the pivot_table function first and concatenate then

In [67]:
happiness_pivot = happiness.pivot_table(columns=['Country'])
business_pivot = business.pivot_table(columns=['Country Name'])

In [94]:
data = pd.concat([happiness_pivot, business_pivot]) #concatenating both pivot tables together
data = data.astype(object)
data = data.T
data_plot = data
data = data.dropna(subset=['Happiness.Rank']) #delete all the rows/countries that don't appear in the happiness report
data.head()

Unnamed: 0,Dystopia.Residual,Economy..GDP.per.Capita.,Family,Freedom,Generosity,Happiness.Rank,Health..Life.Expectancy.,Trust..Government.Corruption.,Ease of doing business
Afghanistan,2.1508,0.401477,0.581543,0.10618,0.311871,141,0.180747,0.0611578,183
Albania,1.49044,0.996193,0.803685,0.381499,0.201313,109,0.73116,0.0398642,65
Algeria,2.5676,1.09186,1.14622,0.233336,0.0694366,53,0.617585,0.146096,166
Angola,1.61448,0.858428,1.10441,0.0,0.0979265,140,0.0498687,0.0697203,175
Argentina,2.61401,1.1853,1.44045,0.494519,0.109457,24,0.695137,0.0597399,117


In [70]:
len(data)

155

After preparing the data, there are 155 countries left which are in the Happiness Report and have a ranking in the Ease of doing business index

## Let's look for correlations using the pandas .corr function

In [71]:
Happiness_Correlations = data.astype(float).corr(method='pearson', min_periods=10)
Happiness_Correlations = Happiness_Correlations[['Happiness.Rank']]
Happiness_Correlations.abs().sort_values(by='Happiness.Rank', ascending=False)

Unnamed: 0,Happiness.Rank
Happiness.Rank,1.0
Economy..GDP.per.Capita.,0.813244
Health..Life.Expectancy.,0.780716
Family,0.736753
Ease of doing business,0.675495
Freedom,0.551608
Dystopia.Residual,0.484506
Trust..Government.Corruption.,0.405842
Generosity,0.13262


## The correlation between Happiness and Ease of doing business index is just 0.675.  Let's plot the relationshipt anyway

In [72]:
plotly.offline.iplot({
    "data": [go.Scatter(x=data['Happiness.Rank'], 
                        y=data['Ease of doing business'], 
                        mode="markers", 
                        hovertext=data.index,
                        )
            ],
    "layout": go.Layout(title="World",
                        xaxis=dict(title='Happiness Rank'),
                        yaxis=dict(title='Ease of doing business Rank')
                       )
})

## When plotted we can see a slight but not strong relationship between the Happiness and Ease of doing business index


## Let's see if there is a relationship between the Ease of doing business index and the GDP per capita

In [74]:
plotly.offline.iplot({
    "data": [go.Scatter(x=data['Economy..GDP.per.Capita.'], 
                        y=data['Ease of doing business'], 
                        mode="markers", 
                        hovertext=data.index,
                        )
            ],
    "layout": go.Layout(title="World",
                        xaxis=dict(title='GDP per capita (normalized)'),
                        yaxis=dict(title='Ease of doing business')
                       )
})

## There seems to be some relationshipt between Ease of doing business index and GDP per capita. 
## Let's investigate that further

In [80]:
business_gdp = data[['Ease of doing business', 'Economy..GDP.per.Capita.']]
business_gdp.head()

Unnamed: 0,Ease of doing business,Economy..GDP.per.Capita.
Afghanistan,183,0.401477
Albania,65,0.996193
Algeria,166,1.09186
Angola,175,0.858428
Argentina,117,1.1853


In [86]:
Business_vs_GDP = business_gdp.astype(float).corr(method='pearson', min_periods=10)
Business_vs_GDP

Unnamed: 0,Ease of doing business,Economy..GDP.per.Capita.
Ease of doing business,1.0,-0.757251
Economy..GDP.per.Capita.,-0.757251,1.0


## There is some correlation of -0.757 between Ease of doing business and GDP per capita 
## This means the lower the Ease of business index, the higher the GDP per capita. Not really suprising but nice to confirm it anyway

# Graphs

## Ease of doing business index

In [101]:
graph_world = [ dict(
        type = 'choropleth',
        locations = business_plot['Country Name'],
        locationmode = 'country names',
        z = business_plot['Ease of doing business'],
        text = business_plot['Country Name'],
        colorscale = 'RdBu',
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.3
            ) ),
        hoverinfo = 'z+text'      
      ) ]

layout = dict(
    title = 'Ease of doing business rank',
    autosize=False,
    width=1000,
    height=900,
    geo = dict(
        showframe = False,
        showcoastlines = True,
        projection = dict(
            type = 'mercator'
        )
    )
)

plotly.offline.iplot(dict(data=graph_world, layout=layout))