# World Happiness Summary

In [1]:
import plotly
import plotly.graph_objects as go
import pandas as pd

import os
if not os.path.exists("images"):
    os.mkdir("images")


In [2]:
df_q1 = pd.read_csv("../../Data/Global_HI.csv")

df_q1 = df_q1.rename(columns={"Country": "country", "Happiness Index": "HI", "Code": "CODE"}) 

df_q1.head()


Unnamed: 0,country,CODE,Year,HI,Log GDP per capita,Social Support,Life expectancy,Freedom,Generosity,Perceptions of corruption,...,Security threats index,Brain drain index,Unemployment rate,Blue Index,Suicide,Depression & Anxiety,Substance Abuse,Physician perpop,Average Age,Sleep (min)
0,Afghanistan,AFG,2017,2.661718,7.472197,0.49088,52.799999,0.427011,-0.110382,0.954393,...,1.0,8.2,11.18,12.473027,5.84,4077.0,5.84,0.1743,18.8,
1,Albania,ALB,2017,4.639548,9.376133,0.637698,68.400002,0.749611,-0.029357,0.876135,...,4.7,7.6,13.75,11.829407,5.59,2733.0,5.59,1.1951,32.9,
2,Algeria,DZA,2017,5.248912,9.537937,0.806754,65.699997,0.43667,-0.18775,0.699774,...,7.2,5.8,12.0,,,,,,,
3,Argentina,ARG,2017,6.03933,9.849303,0.906699,68.599998,0.831966,-0.178522,0.841052,...,4.3,3.0,8.35,10.52255,11.64,4074.0,11.64,1.4053,31.7,418.0
4,Armenia,ARM,2017,4.287736,9.076206,0.697925,66.599998,0.613697,-0.13084,0.864683,...,5.5,6.2,17.7,11.377772,7.9,3181.0,7.9,2.6724,35.1,


### Visualizing Happiness Score

Need to rename some columns in order to merge properly

In [3]:
#figure loading

fig_Q1 = go.Figure(data=go.Choropleth(
    locations = df_q1['CODE'],
    z = df_q1['HI'],
    text = df_q1['country'],
    colorscale = 'Picnic',
    autocolorscale=False,
    reversescale=False,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    #colorbar_tickprefix = '',
    colorbar_title = 'Happiness<br>Index',))

#figure layout
fig_Q1.update_layout(
    title_text='2017 Global Happiness Index',
    geo=dict(
        showframe=True,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.25,
        y=-0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://en.wikipedia.org/wiki/World_Happiness_Report">\
            United Nations World Happiness Report</a>',
        showarrow = False
    )]
)

#globe
#fig.update_geos(projection_type="orthographic")
fig_Q1.update_geos(projection_type="natural earth")

fig_Q1.show()

In [4]:
#fig.write_image("images/fig1.jpg")

plotly.io.write_image(fig_Q1,"../../Images/Fig_Q1.png",format="png", scale=None, width=None, height=None, validate=True)

In [5]:
df_q6 = pd.read_csv("../../Data/Q6_v2.csv")

df_q6 = df_q6.rename(columns={"Country name": "country", "Happiness Index": "HI", "Code": "CODE"}) 

df_q6

Unnamed: 0,country,CODE,GDP,HI,Log GDP,Social support,Healthy life exp,Freedom,Generosity,Pct corruption,...,Rev tourism,Fragile index,Security index,brain drain,Unemployment rate,Continent,GDP-Happiness diff,Rank,Unemp-Happiness Diff,Rank.1
0,Finland,FIN,0.416375,1.0,0.824295,1.0,0.836806,0.946429,0.376623,0.037975,...,0.068402,0.156677,0.08046,0.133333,0.306622,Europe,-0.175705,101.0,-0.693378,111.0
1,Denmark,DNK,0.484193,0.961014,0.852495,0.980769,0.847222,0.946429,0.584416,0.025316,...,0.111899,0.18612,0.08046,0.08,0.197517,Europe,-0.108519,92.0,-0.763497,119.0
2,Netherlands,NLD,0.48672,0.935673,0.861171,0.961538,0.850694,0.875,0.701299,0.253165,...,0.308315,0.24816,0.057471,0.173333,0.163657,Europe,-0.074501,80.0,-0.772016,121.0
3,New Zealand,NZL,0.357889,0.910331,0.793926,0.980769,0.878472,0.910714,0.766234,0.075949,...,0.139386,0.197687,0.022989,0.106667,0.15839,Oceania,-0.116405,94.0,-0.751942,118.0
4,Botswana,BWA,0.146514,0.163743,0.62039,0.634615,0.364583,0.696429,0.051948,0.721519,...,0.006931,0.630915,0.356322,0.56,0.639955,Africa,0.456648,1.0,0.476212,3.0
5,Malawi,MWI,0.001706,0.148148,0.047722,0.230769,0.319444,0.75,0.38961,0.721519,...,0.00025,0.885384,0.436782,0.88,0.196388,Africa,-0.100426,88.0,0.04824,18.0
6,Tanzania,TZA,0.017522,0.134503,0.240781,0.519231,0.322917,0.660714,0.506494,0.620253,...,0.029635,0.047319,0.517241,0.84,0.057938,Africa,0.106278,35.0,-0.076565,27.0
7,Rwanda,RWA,0.009859,0.087719,0.156182,0.153846,0.444444,0.857143,0.441558,0.063291,...,0.00701,0.063091,0.597701,0.813333,0.021445,Africa,0.068463,46.0,-0.066275,25.0
8,Afghanistan,AFG,0.01211,0.0,0.138829,0.096154,0.177083,0.0,0.233766,1.0,...,0.0,0.141956,0.0,0.92,0.402182,Asia,0.138829,29.0,0.402182,4.0
9,Brazil,BRA,0.122107,0.7154,0.592191,0.884615,0.642361,0.589286,0.142857,0.797468,...,0.081157,0.677182,0.655172,0.44,0.463883,Americas,,,,


### Visualizing Happiness Index of Outlier Countries

Selecting particular countries

In [6]:
#figure loading

fig_Q6 = go.Figure(data=go.Choropleth(
    locations = df_q6['CODE'],
    z = df_q6['HI'],
    text = df_q6['country'],
    colorscale = 'Picnic',
    autocolorscale=False,
    reversescale=False,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    #colorbar_tickprefix = '',
    colorbar_title = 'Happiness<br>Index',))

#figure layout
fig_Q6.update_layout(
    title_text='Outlier Countries Happiness Index',
    geo=dict(
        showframe=True,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.25,
        y=-0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://en.wikipedia.org/wiki/World_Happiness_Report">\
            United Nations World Happiness Report</a>',
        showarrow = False
    )]
)

#globe
#fig.update_geos(projection_type="orthographic")
fig_Q6.update_geos(projection_type="natural earth")

fig_Q6.show()

In [7]:
plotly.io.write_image(fig_Q6,"../../Images/Fig_Q6.png",format="png", scale=None, width=None, height=None, validate=True)

In [8]:
df_q7 = pd.read_csv("../../Data/Q7_v2.csv")

df_q7 = df_q7.rename(columns={"Country name": "country", "Happiness Index": "HI", "Code": "CODE"}) 

df_q7

Unnamed: 0,country,CODE,GDP,HI,Log GDP,Social support,Healthy life exp,Freedom,Generosity,Pct corruption,...,Rev tourism,Fragile index,Security index,brain drain,Unemployment rate,Continent,GDP-Happiness diff,Rank,Unemp-Happiness Diff,Rank.1
0,Finland,FIN,0.416375,1.0,0.824295,1.0,0.836806,0.946429,0.376623,0.037975,...,0.068402,0.156677,0.08046,0.133333,0.306622,Europe,-0.175705,101.0,-0.693378,111.0
1,Denmark,DNK,0.484193,0.961014,0.852495,0.980769,0.847222,0.946429,0.584416,0.025316,...,0.111899,0.18612,0.08046,0.08,0.197517,Europe,-0.108519,92.0,-0.763497,119.0
2,Netherlands,NLD,0.48672,0.935673,0.861171,0.961538,0.850694,0.875,0.701299,0.253165,...,0.308315,0.24816,0.057471,0.173333,0.163657,Europe,-0.074501,80.0,-0.772016,121.0
3,Canada,CAN,0.426038,0.925926,0.83731,0.942308,0.892361,0.928571,0.584416,0.253165,...,0.268599,0.197687,0.137931,0.053333,0.22009,Americas,-0.088616,84.0,-0.705836,113.0
4,New Zealand,NZL,0.357889,0.910331,0.793926,0.980769,0.878472,0.910714,0.766234,0.075949,...,0.139386,0.197687,0.022989,0.106667,0.15839,Oceania,-0.116405,94.0,-0.751942,118.0
5,Botswana,BWA,0.146514,0.163743,0.62039,0.634615,0.364583,0.696429,0.051948,0.721519,...,0.006931,0.630915,0.356322,0.56,0.639955,Africa,0.456648,1.0,0.476212,3.0
6,Malawi,MWI,0.001706,0.148148,0.047722,0.230769,0.319444,0.75,0.38961,0.721519,...,0.00025,0.885384,0.436782,0.88,0.196388,Africa,-0.100426,88.0,0.04824,18.0
7,Tanzania,TZA,0.017522,0.134503,0.240781,0.519231,0.322917,0.660714,0.506494,0.620253,...,0.029635,0.047319,0.517241,0.84,0.057938,Africa,0.106278,35.0,-0.076565,27.0
8,Rwanda,RWA,0.009859,0.087719,0.156182,0.153846,0.444444,0.857143,0.441558,0.063291,...,0.00701,0.063091,0.597701,0.813333,0.021445,Africa,0.068463,46.0,-0.066275,25.0
9,Afghanistan,AFG,0.01211,0.0,0.138829,0.096154,0.177083,0.0,0.233766,1.0,...,0.0,0.141956,0.0,0.92,0.402182,Asia,0.138829,29.0,0.402182,4.0


### Visualizing Country Happiness Index plus Canada
Countries plus Canada

In [9]:
#figure loading

fig_Q7 = go.Figure(data=go.Choropleth(
    locations = df_q7['CODE'],
    z = df_q7['HI'],
    text = df_q7['country'],
    colorscale = 'Picnic',
    autocolorscale=False,
    reversescale=False,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    #colorbar_tickprefix = '',
    colorbar_title = 'Happiness<br>Index',))

#figure layout
fig_Q7.update_layout(
    title_text='Outlier Countries Happiness Index',
    geo=dict(
        showframe=True,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.25,
        y=-0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://en.wikipedia.org/wiki/World_Happiness_Report">\
            United Nations World Happiness Report</a>',
        showarrow = False
    )]
)

#globe
#fig.update_geos(projection_type="orthographic")
fig_Q7.update_geos(projection_type="natural earth")

fig_Q7.show()

In [10]:
plotly.io.write_image(fig_Q7,"../../Images/Fig_Q7.png",format="png", scale=None, width=None, height=None, validate=True)

# Correlation Analysis
We use normalized data for correlation. Why?

In [11]:
%matplotlib notebook
import plotly.express as px
import pandas as pd
#from matplotlib import pyplot as plt
from scipy.stats import linregress
#import numpy as np
import plotly.graph_objs as go
import statsmodels.api as sm

In [12]:
df_2017N = pd.read_csv("../../Data/Normalized Table 2017_df_v3.csv")
df_2017N_v2 = df_2017N[["Country name", "Code","Continent","Happiness Index", "Log GDP", "Social support", "Freedom", "Healthy life exp", "Generosity", "Sleep (min)", "Literacy rate", "Rev tourism","Average Age", "Tourist arrivals", "Blue Index", "Suicide", "Pct spent ed", "Depression & Anxiety", "Substance Abuse", "Physician perpop", "Unemployment rate", "Pct corruption", "Pct Poverty", "Fragile index ", "Security index", "brain drain" ]]
df_2017N_v3 = df_2017N_v2.rename(columns={"continent":"Continent","Happiness Index":"Happiness index", "Healthy life exp":"Life exp","Physician perpop": "Physician per pop","Substance Abuse": "Substance abuse", "Blue Index":"Blue index", "Pct spent ed":"% Spent edu", "Pct corruption":"% Corruption", "Pct Poverty":"% Poverty","Unemployment rate":"% Unemployment","brain drain": "Brain drain" })
df_2017N_v4 = df_2017N_v3[["Country name", "Code", "Continent", "Happiness index", "% Unemployment"]]
df_2017N_v4 = df_2017N_v4.dropna(how='any')
df_2017N_v4.head()

Unnamed: 0,Country name,Code,Continent,Happiness index,% Unemployment
0,Afghanistan,AFG,Asia,0.0,0.402182
1,Albania,ALB,Europe,0.385965,0.498871
2,Argentina,ARG,Americas,0.658869,0.295711
3,Armenia,ARM,Asia,0.317739,0.647479
4,Australia,AUS,Oceania,0.896686,0.191874


## Scatter Plot - GDP and Happiness Index

In [13]:
GDP = df_2017N_v3["Log GDP"]
HI = df_2017N_v3["Happiness index"]
color = ["royalblue","darkturquoise","crimson", "mediumpurple","mediumseagreen"]


#scatter plot
fig_Q2a_1 = px.scatter(df_2017N_v3, GDP, HI, hover_name="Country name"
           ,template="simple_white",color_discrete_sequence=color, color="Continent")


#update layout
fig_Q2a_1.update_layout(title_text="GDP and Happiness Index"
                      #,annotations= [dict(x=0.05, y=1, text=line_eq, showarrow = False)]
                      ,width=750, height=650
                     )


fig_Q2a_1.show()


## Scatter Plot - GDP and Happiness Index Regression
Happiness Index is positively correlated with Log GDP

In [14]:
GDP = df_2017N_v3["Log GDP"]
HI = df_2017N_v3["Happiness index"]
color = ["royalblue","darkturquoise","crimson", "mediumpurple","mediumseagreen"]


#calculate y-values for regression line
(slope, intercept, rvalue, pvalue, stderr) = linregress(GDP, HI)
regress_values = GDP * slope + intercept
value = regress_values
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

#scatter plot
fig_Q2a_2 = px.scatter(df_2017N_v3, GDP, HI, hover_name="Country name"
           ,template="simple_white",color_discrete_sequence=color, color="Continent")

#regression line
regline_a = sm.OLS(value,sm.add_constant(GDP)).fit().fittedvalues

#add regression line to scatter
fig_Q2a_2.add_traces(go.Scatter(x=GDP, y=regline_a, mode='lines', marker_color='dimgrey', name='Regression'))

#update layout
fig_Q2a_2.update_layout(title_text="GDP and Happiness Index are Positively Correlated"
                      ,annotations= [dict(x=0.05, y=1, text=line_eq, showarrow = False)]
                      ,width=750, height=650
                     )


fig_Q2a_2.show()



Method .ptp is deprecated and will be removed in a future version. Use numpy.ptp instead.



In [15]:
fig_Q2a_1.write_image("../../images/fig_Q2a_1.png")
fig_Q2a_2.write_image("../../images/fig_Q2a_2.png")

### Correlation Between Unemployment and Happiness Index

In [16]:
#Correlation of Unemployment with the Happiness Index


unemp = df_2017N_v4["% Unemployment"]
HI = df_2017N_v4["Happiness index"]
color = ["royalblue","darkturquoise","crimson", "mediumpurple","mediumseagreen"]

#scatter plot
fig_Q2b_1 = px.scatter(df_2017N_v4, unemp, HI, hover_name="Country name"
           ,template="simple_white",color_discrete_sequence=color, color="Continent")

#update layout
fig_Q2b_1.update_layout(title_text="% Unemployment and Happiness Index"
                      #,annotations= [dict(x=0.05, y=1, text=line_eq, showarrow = False)]
                      ,width=750, height=650
                     )


fig_Q2b_1.show()

### Correlation Between Unemployment and Happiness Index, Regression

Unemployment % has weaker negative correlation with Happiness Index

In [17]:
#Correlation of Unemployment with the Happiness Index


unemp = df_2017N_v4["% Unemployment"]
HI = df_2017N_v4["Happiness index"]
color = ["royalblue","darkturquoise","crimson", "mediumpurple","mediumseagreen"]


#calculate y-values for regression line
(slope, intercept, rvalue, pvalue, stderr) = linregress(unemp, HI)
regress_values = unemp * slope + intercept
value = regress_values
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

#scatter plot
fig_Q2b_2 = px.scatter(df_2017N_v4, unemp, HI, hover_name="Country name"
           ,template="simple_white",color_discrete_sequence=color, color="Continent")

#regression line
regline_b = sm.OLS(value,sm.add_constant(unemp)).fit().fittedvalues

#add regression line to scatter
fig_Q2b_2.add_traces(go.Scatter(x=unemp, y=regline_b, mode='lines', marker_color='dimgrey', name='Regression'))

fig_Q2b_2.update_layout(title_text="% Unemployment and Happiness Index are Negatively Correlated"
                      ,annotations= [dict(x=0.05, y=1, text=line_eq, showarrow = False)]
                      ,width=750, height=650
                     )


fig_Q2b_2.show()

In [18]:
fig_Q2b_1.write_image("../../images/fig_Q2b_1.png")
fig_Q2b_2.write_image("../../images/fig_Q2b_2.png")

# Correlation of Different Factors to Happiness Index
1. Load different files
2. Renaming some so it can be used together


In [53]:
%matplotlib notebook
import plotly.express as px
import pandas as pd
from matplotlib import pyplot as plt
from scipy.stats import linregress
import numpy as np

In [54]:
df_2017 = pd.read_csv("../../Data/df_2017.csv")
df_2017.head()

Unnamed: 0.1,Unnamed: 0,Country name,Code,year,GDP,Happiness Index,Log GDP,Social support,Healthy life exp,Freedom,...,Pct Poverty,Pct spent ed,Literacy rate,Tourist arrivals,Rev tourism,Fragile index,Security index,brain drain,Unemployment rate,continent
0,0,Afghanistan,AFG,2017,2203.0,2.66,7.47,0.49,52.8,0.43,...,,15.66,,,16.0,17.3,1.0,8.2,11.18,Asia
1,1,Albania,ALB,2017,13094.0,4.64,9.38,0.64,68.4,0.75,...,,12.39,,4643.0,2050.0,6.5,4.7,7.6,13.75,Europe
2,2,Argentina,ARG,2017,23563.0,6.04,9.85,0.91,68.6,0.83,...,25.7,13.26,,6711.0,5835.0,48.2,4.3,3.0,8.35,Americas
3,3,Armenia,ARM,2017,12115.0,4.29,9.08,0.7,66.6,0.61,...,25.7,10.4,99.74,1495.0,1140.0,71.0,5.5,6.2,17.7,Asia
4,4,Australia,AUS,2017,48905.0,7.26,10.71,0.95,73.3,0.91,...,,,,8815.0,43975.0,22.3,2.1,1.3,5.59,Oceania


In [55]:
df_2017_v2 = df_2017[["Country name", "Code","continent", "year","Happiness Index", "GDP", "Social support", "Freedom", "Healthy life exp", "Generosity", "Sleep (min)", "Literacy rate", "Rev tourism","Average Age", "Tourist arrivals", "Blue Index", "Suicide", "Pct spent ed", "Depression & Anxiety", "Substance Abuse", "Physician perpop", "Unemployment rate", "Pct corruption", "Pct Poverty", "Fragile index ", "Security index", "brain drain" ]]
df_2017_v2.head()

Unnamed: 0,Country name,Code,continent,year,Happiness Index,GDP,Social support,Freedom,Healthy life exp,Generosity,...,Pct spent ed,Depression & Anxiety,Substance Abuse,Physician perpop,Unemployment rate,Pct corruption,Pct Poverty,Fragile index,Security index,brain drain
0,Afghanistan,AFG,Asia,2017,2.66,2203.0,0.49,0.43,52.8,-0.11,...,15.66,4279.0,521.797725,2.4356,11.18,0.95,,17.3,1.0,8.2
1,Albania,ALB,Europe,2017,4.64,13094.0,0.64,0.75,68.4,-0.03,...,12.39,2898.0,1080.73087,2.4356,13.75,0.88,,6.5,4.7,7.6
2,Argentina,ARG,Americas,2017,6.04,23563.0,0.91,0.83,68.6,-0.18,...,13.26,4117.0,851.924597,2.4356,8.35,0.84,25.7,48.2,4.3,3.0
3,Armenia,ARM,Asia,2017,4.29,12115.0,0.7,0.61,66.6,-0.13,...,10.4,3296.0,1161.792104,3.9901,17.7,0.86,25.7,71.0,5.5,6.2
4,Australia,AUS,Oceania,2017,7.26,48905.0,0.95,0.91,73.3,0.31,...,,5396.0,810.016527,4.4023,5.59,0.41,,22.3,2.1,1.3


In [56]:
df_2017_v3 = df_2017_v2.rename(columns={"continent":"Continent","Happiness Index":"Happiness index", "year":"Year", "Healthy life exp":"Life exp","Physician perpop": "Physician per pop","Substance Abuse": "Substance abuse", "Blue Index":"Blue index", "Pct spent ed":"% Spent edu", "Pct corruption":"% Corruption", "Pct Poverty":"% Poverty","Unemployment rate":"% Unemployment","brain drain": "Brain drain" })
df_2017_v3.head()

Unnamed: 0,Country name,Code,Continent,Year,Happiness index,GDP,Social support,Freedom,Life exp,Generosity,...,% Spent edu,Depression & Anxiety,Substance abuse,Physician per pop,% Unemployment,% Corruption,% Poverty,Fragile index,Security index,Brain drain
0,Afghanistan,AFG,Asia,2017,2.66,2203.0,0.49,0.43,52.8,-0.11,...,15.66,4279.0,521.797725,2.4356,11.18,0.95,,17.3,1.0,8.2
1,Albania,ALB,Europe,2017,4.64,13094.0,0.64,0.75,68.4,-0.03,...,12.39,2898.0,1080.73087,2.4356,13.75,0.88,,6.5,4.7,7.6
2,Argentina,ARG,Americas,2017,6.04,23563.0,0.91,0.83,68.6,-0.18,...,13.26,4117.0,851.924597,2.4356,8.35,0.84,25.7,48.2,4.3,3.0
3,Armenia,ARM,Asia,2017,4.29,12115.0,0.7,0.61,66.6,-0.13,...,10.4,3296.0,1161.792104,3.9901,17.7,0.86,25.7,71.0,5.5,6.2
4,Australia,AUS,Oceania,2017,7.26,48905.0,0.95,0.91,73.3,0.31,...,,5396.0,810.016527,4.4023,5.59,0.41,,22.3,2.1,1.3


In [57]:
df_corr = df_2017_v3.corr()
corr_barh = df_corr["Happiness index"].plot(kind="barh",width=0.85,figsize=(12, 6),color="lightcoral")

corr_barh.set_ylabel("Variable", size=10)
corr_barh.set_xlabel("Correlation", size=10)
plt.tight_layout()
plt.savefig("../../images/Fig_Q4.png")
plt.show()





<IPython.core.display.Javascript object>

# Different Happiness Factors on Different Countries

In [24]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib notebook

#import normalized score
normal_path=("../../Data/Normalized Table 2017_df.csv")
df_normal = pd.read_csv(normal_path)


df_normal.rename(columns={"brain drain":"Brain Drain","Security index":"Security Threat", "Pct Corruption":"Perception of Corruption"}, inplace=True)


### Happiness Index Factors on Outlier Countries with "Better" Economy But Low Happiness (Exploration)

Using combination of log GDP and unemployment rate

In [25]:
#Preparing the data
country_list=["South Korea","Japan","Rwanda","Tanzania","Ukraine","Iraq"]
variable_list= ["Log GDP","Social support","Healthy life exp","Freedom" ]


country0=[]
country0.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[0] ].item() )
country0.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[1]].item() )
country0.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[2]].item() )
country0.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[3]].item() )

country1 = []
country1.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[0]].item() )
country1.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[1]].item() )
country1.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[2]].item() )
country1.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[3]].item() )

country2 = []
country2.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[0]].item() )
country2.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[1]].item() )
country2.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[2]].item() )
country2.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[3]].item() )

country3 =[]
country3.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[0]].item() )
country3.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[1]].item() )
country3.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[2]].item() )
country3.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[3]].item() )

country4 = []
country4.append(df_normal[df_normal["Country name"]==country_list[4]][variable_list[0]].item() )
country4.append(df_normal[df_normal["Country name"]==country_list[4]][variable_list[1]].item() )
country4.append(df_normal[df_normal["Country name"]==country_list[4]][variable_list[2]].item() )
country4.append(df_normal[df_normal["Country name"]==country_list[4]][variable_list[3]].item() )

country5 = []
country5.append(df_normal[df_normal["Country name"]==country_list[5]][variable_list[0]].item() )
country5.append(df_normal[df_normal["Country name"]==country_list[5]][variable_list[1]].item() )
country5.append(df_normal[df_normal["Country name"]==country_list[5]][variable_list[2]].item() )
country5.append(df_normal[df_normal["Country name"]==country_list[5]][variable_list[3]].item() )

country0


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and wi

[0.7939262472885034,
 0.7115384615384616,
 0.8888888888888887,
 0.19642857142857148]

In [26]:

N = len(country0) 

fig, ax = plt.subplots(figsize = (12,6))

color = ["lightskyblue","darkturquoise","mediumseagreen","blue","mediumpurple","darkviolet","lightcoral","crimson"]

ind = np.arange(N)    # the x locations for the groups
width = 0.13        # the width of the bars. This is pretty much the place to control spacing
ax.bar(ind + width*0, country0, width, bottom=0,  label=country_list[0], color = color[0])
ax.bar(ind + width*1, country1, width, bottom=0, label=country_list[1],color = color[1])
ax.bar(ind + width*2, country2, width, bottom=0, label=country_list[2], color = color[2])
ax.bar(ind + width*3, country3, width, bottom=0, label=country_list[3], color = color[3])
ax.bar(ind + width*4, country4, width, bottom=0, label=country_list[4], color = color[4])
ax.bar(ind + width*5, country5, width, bottom=0, label=country_list[5], color = color[5])


ax.set_title('How Do These Outliers Perform on Happiness Index Factors?')
ax.set_xticks(ind + width * 2) #The location of label
ax.set_xticklabels((variable_list[0], variable_list[1],  variable_list[2],  variable_list[3] ))

ax.legend(loc="best" )



plt.xlabel("Variables")
plt.ylabel("Normalized Scores")
plt.savefig(f"../../Images/Happiness Index Factor - Rich.png")
plt.tight_layout()
plt.show()



<IPython.core.display.Javascript object>

### Negative Factors on Outlier Countries with "Better" Economy, Lower Happiness (Exploration)

In [27]:
df_normal.head()
#df_normal = df_normal.rename(columns={"brain drain":"Brain drain","Security index":"Security Threat", "Pct Corruption":"Perception of Corruption"}, inplace=True)


Unnamed: 0,Country name,Code,GDP,Happiness Index,Log GDP,Social support,Healthy life exp,Freedom,Generosity,Pct corruption,...,Pct Poverty,Pct spent ed,Literacy rate,Tourist arrivals,Rev tourism,Fragile State index,Security Threat,Brain drain,Unemployment rate,Blue Index
0,Afghanistan,AFG,0.01211,0.0,0.138829,0.096154,0.177083,0.0,0.233766,1.0,...,,0.375431,,,0.0,0.958874,1.0,0.92,0.402182,0.219939
1,Albania,ALB,0.109372,0.385965,0.553145,0.384615,0.71875,0.571429,0.337662,0.911392,...,,0.234483,,0.053397,0.008087,0.452381,0.411111,0.84,0.498871,0.174066
2,Argentina,ARG,0.202865,0.658869,0.655098,0.903846,0.725694,0.714286,0.142857,0.860759,...,0.344675,0.271983,,0.077236,0.023135,0.319264,0.366667,0.226667,0.295711,0.335137
3,Armenia,ARM,0.100629,0.317739,0.488069,0.5,0.65625,0.321429,0.207792,0.886076,...,0.344675,0.148707,0.999029,0.017107,0.004469,0.566017,0.5,0.653333,0.647479,0.31737
4,Australia,AUS,0.429181,0.896686,0.841649,0.980769,0.888889,0.857143,0.779221,0.316456,...,,,,0.101491,0.174768,0.038961,0.122222,0.0,0.191874,0.218605


In [28]:
#Preparing the data

country_list=["South Korea","Japan","Rwanda","Tanzania","Ukraine","Iraq"]
variable_list= ["Brain drain","Security Threat" ]



country0n=[]
country0n.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[0] ].item() )
country0n.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[1]].item() )


country1n = []
country1n.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[0]].item() )
country1n.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[1]].item() )

country2n = []
country2n.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[0]].item() )
country2n.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[1]].item() )


country3n =[]
country3n.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[0]].item() )
country3n.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[1]].item() )


country4n = []
country4n.append(df_normal[df_normal["Country name"]==country_list[4]][variable_list[0]].item() )
country4n.append(df_normal[df_normal["Country name"]==country_list[4]][variable_list[1]].item() )


country5n = []
country5n.append(df_normal[df_normal["Country name"]==country_list[5]][variable_list[0]].item() )
country5n.append(df_normal[df_normal["Country name"]==country_list[5]][variable_list[1]].item() )





`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version



In [29]:

N = len(country0n) #number of variables to be tested



fig, ax = plt.subplots(figsize = (7,6))

ind = np.arange(N)    # the x locations for the groups
width = 0.1        # the width of the bars. This is pretty much the place to control spacing
ax.bar(ind + width*0, country0n, width, bottom=0,  label=country_list[0],color = color[0])
ax.bar(ind + width*1, country1n, width, bottom=0, label=country_list[1],color = color[1])
ax.bar(ind + width*2, country2n, width, bottom=0, label=country_list[2],color = color[2])
ax.bar(ind + width*3, country3n, width, bottom=0, label=country_list[3],color = color[3])
ax.bar(ind + width*4, country4n, width, bottom=0, label=country_list[4],color = color[4])
ax.bar(ind + width*5, country5n, width, bottom=0, label=country_list[5],color = color[5])





ax.set_title('Top Factors Correlated with Happiness')
ax.set_xticks(ind + width * 2) 
ax.set_xticklabels((variable_list[0], variable_list[1] ))

ax.legend(loc='upper right')

plt.tight_layout()
plt.xlabel("Variables")
plt.ylabel("Normalized Scores")
plt.savefig(f"../../Images/Happiness Outside Negative Factor - Rich.png")
plt.show()

<IPython.core.display.Javascript object>

### How Do These Countries Compare to Canada? (Exploration)

In [30]:
#import all Happiness Score
all_path=("../../Data/New_HappyIndex_mx.csv")
df_all = pd.read_csv(all_path)
df_all

Unnamed: 0,Country name,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,Afghanistan,3.72,3.72,4.40,4.76,3.83,3.78,3.57,3.13,3.98,4.22,2.66
1,Albania,4.63,5.49,5.49,5.27,5.87,5.51,4.55,4.81,4.61,4.51,4.64
2,Argentina,6.07,5.96,6.42,6.44,6.78,6.47,6.58,6.67,6.70,6.43,6.04
3,Armenia,4.88,4.65,4.18,4.37,4.26,4.32,4.28,4.45,4.35,4.33,4.29
4,Australia,7.29,7.25,7.45,7.45,7.41,7.20,7.36,7.29,7.31,7.25,7.26
...,...,...,...,...,...,...,...,...,...,...,...,...
123,Venezuela,6.26,6.26,7.19,7.48,6.58,7.07,6.55,6.14,5.57,4.04,5.07
124,Vietnam,5.42,5.48,5.30,5.30,5.77,5.53,5.02,5.08,5.08,5.06,5.18
125,Yemen,4.48,4.81,4.81,4.35,3.75,4.06,4.22,3.97,2.98,3.83,3.25
126,Zambia,4.00,4.73,5.26,5.00,5.00,5.01,5.24,4.35,4.84,4.35,3.93


In [31]:
df_normal.rename(columns={"brain drain":"Brain Drain","Security index":"Security Threat", "Pct Corruption":"Perception of Corruption"}, inplace=True)


country0= df_all[ df_all["Country name"] == country_list[0] ]
country0= country0.set_index("Country name").transpose().rename(columns={country_list[0]:"Happiness Index" } )
country0["Country"] = country_list[0]

country1= df_all[ df_all["Country name"] == country_list[1] ]
country1= country1.set_index("Country name").transpose().rename(columns={country_list[1]:"Happiness Index" } )
country1["Country"] = country_list[1]

country2= df_all[ df_all["Country name"] == country_list[2] ]
country2= country2.set_index("Country name").transpose().rename(columns={country_list[2]:"Happiness Index" } )
country2["Country"] = country_list[2]

country3= df_all[ df_all["Country name"] == country_list[3] ]
country3= country3.set_index("Country name").transpose().rename(columns={country_list[3]:"Happiness Index" } )
country3["Country"] = country_list[3]

country4= df_all[ df_all["Country name"] == country_list[4] ]
country4= country4.set_index("Country name").transpose().rename(columns={country_list[4]:"Happiness Index" } )
country4["Country"] = country_list[4]

country5= df_all[ df_all["Country name"] == country_list[5] ]
country5= country5.set_index("Country name").transpose().rename(columns={country_list[5]:"Happiness Index" } )
country5["Country"] = country_list[5]

country6=[]
country6= df_all[ df_all["Country name"] == "Canada" ]
country6= country6.set_index("Country name").transpose().rename(columns={"Canada":"Happiness Index" } )
country6["Country"] = "Canada"

country6

Country name,Happiness Index,Country
2007,7.48,Canada
2008,7.49,Canada
2009,7.49,Canada
2010,7.65,Canada
2011,7.43,Canada
2012,7.42,Canada
2013,7.59,Canada
2014,7.3,Canada
2015,7.41,Canada
2016,7.24,Canada


In [32]:
plt.figure(figsize=(8,7))

plt.plot(country0.index, country0["Happiness Index"], label= country_list[0], color = color[0] )
plt.plot(country1.index, country1["Happiness Index"], label= country_list[1], color = color[1] )
plt.plot(country2.index, country2["Happiness Index"], label= country_list[2], color = color[2] )
plt.plot(country3.index, country3["Happiness Index"], label= country_list[3], color = color[3] )
plt.plot(country4.index, country4["Happiness Index"], label= country_list[4], color = color[4] )
plt.plot(country5.index, country5["Happiness Index"], label= country_list[5], color = color[5] )
plt.bar(country6.index, country6["Happiness Index"], label= "Canada", color = color[6],alpha = 0.4 )


plt.legend(loc="best" )
plt.xlabel("Year")
plt.ylabel("Happiness Index")
plt.title("Happiness Overtime")

plt.savefig("../../Images/Rich Line with Canada.png")
plt.grid(False)

<IPython.core.display.Javascript object>

### Happiness Overtime for Countries with "Better" Economy, Lower Happiness

In [33]:
plt.figure(figsize=(8,7))

plt.plot(country0.index, country0["Happiness Index"], label= country_list[0], color = color[0] )
plt.plot(country1.index, country1["Happiness Index"], label= country_list[1], color = color[1] )
plt.plot(country2.index, country2["Happiness Index"], label= country_list[2], color = color[2] )
plt.plot(country3.index, country3["Happiness Index"], label= country_list[3], color = color[3] )
plt.plot(country4.index, country4["Happiness Index"], label= country_list[4], color = color[4] )
plt.plot(country5.index, country5["Happiness Index"], label= country_list[5], color = color[5] )


plt.legend(loc="best")
plt.xlabel("Year")
plt.ylabel("Happiness Index")
plt.title("Happiness Overtime")
plt.savefig("../../Images/Rich Line.png")
plt.grid(False)

<IPython.core.display.Javascript object>

In [34]:
df_normal = df_normal.rename(columns={"brain drain":"Brain Drain","Security index":"Security Threat", "Pct corruption":"Perception of Corruption"})

df_normal.head()

Unnamed: 0,Country name,Code,GDP,Happiness Index,Log GDP,Social support,Healthy life exp,Freedom,Generosity,Perception of Corruption,...,Pct Poverty,Pct spent ed,Literacy rate,Tourist arrivals,Rev tourism,Fragile State index,Security Threat,Brain drain,Unemployment rate,Blue Index
0,Afghanistan,AFG,0.01211,0.0,0.138829,0.096154,0.177083,0.0,0.233766,1.0,...,,0.375431,,,0.0,0.958874,1.0,0.92,0.402182,0.219939
1,Albania,ALB,0.109372,0.385965,0.553145,0.384615,0.71875,0.571429,0.337662,0.911392,...,,0.234483,,0.053397,0.008087,0.452381,0.411111,0.84,0.498871,0.174066
2,Argentina,ARG,0.202865,0.658869,0.655098,0.903846,0.725694,0.714286,0.142857,0.860759,...,0.344675,0.271983,,0.077236,0.023135,0.319264,0.366667,0.226667,0.295711,0.335137
3,Armenia,ARM,0.100629,0.317739,0.488069,0.5,0.65625,0.321429,0.207792,0.886076,...,0.344675,0.148707,0.999029,0.017107,0.004469,0.566017,0.5,0.653333,0.647479,0.31737
4,Australia,AUS,0.429181,0.896686,0.841649,0.980769,0.888889,0.857143,0.779221,0.316456,...,,,,0.101491,0.174768,0.038961,0.122222,0.0,0.191874,0.218605


# Happiness Index Factors on "Poorer", But Happier Countries

In [35]:
#Preparing the data

country_list=["Costa Rica","Spain","Brazil","Nicaragua"]
variable_list= ["Log GDP","Social support","Healthy life exp","Freedom","Perception of Corruption","Generosity" ]


country0b=[]
country0b.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[0] ].item() )
country0b.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[1]].item() )
country0b.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[2]].item() )
country0b.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[3]].item() )
country0b.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[4]].item() )
country0b.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[5]].item() )

country1b = []
country1b.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[0]].item() )
country1b.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[1]].item() )
country1b.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[2]].item() )
country1b.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[3]].item() )
country1b.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[4]].item() )
country1b.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[5]].item() )

country2b = []
country2b.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[0]].item() )
country2b.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[1]].item() )
country2b.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[2]].item() )
country2b.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[3]].item() )
country2b.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[4]].item() )
country2b.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[5]].item() )

country3b =[]
country3b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[0]].item() )
country3b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[1]].item() )
country3b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[2]].item() )
country3b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[3]].item() )
country3b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[4]].item() )
country3b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[5]].item() )

#country4b =[]
#country4b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[0]].item() )
##country4b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[1]].item() )
#country4b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[2]].item() )
#country4b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[3]].item() )
#country4b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[4]].item() )
#country4b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[5]].item() )




`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and wi

#### Python doesn't have multiple barchart. We performed this using subplot tricks

In [36]:

N = len(country0b) 

fig, ax = plt.subplots(figsize = (10,6))

ind = np.arange(N)    # the x locations for the groups
width = 0.15        # the width of the bars. This is pretty much the place to control spacing
ax.bar(ind + width*0, country0b, width, bottom=0,  label=country_list[0],color = color[0])
ax.bar(ind + width*1, country1b, width, bottom=0, label=country_list[1],color = color[1])
ax.bar(ind + width*2, country2b, width, bottom=0, label=country_list[2],color = color[2])
ax.bar(ind + width*3, country3b, width, bottom=0, label=country_list[3],color = color[3])
#ax.bar(ind + width*4, country4b, width, bottom=0, label=country_list[4],color = color[4])
#ax.bar(ind + width*4, country4b, width, bottom=0, label=country_list[5],color = color[5])

ax.set_title('How Do Selected Countries Perform on Happiness Index Factors?')
ax.set_xticks(ind + width * 2) #The location of label
ax.set_xticklabels((variable_list[0], variable_list[1],  variable_list[2],  variable_list[3],variable_list[4],variable_list[5] ))

ax.legend(loc='upper right')


plt.xlabel("Variables")
plt.ylabel("Normalized Scores")
plt.savefig(f"../../Images/Happiness Index Factor - Poor.png")
plt.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

### Negative Factors on "Poorer", But Happier Countries

In [37]:
#Preparing the data


variable_list= ["Brain drain","Security Threat" ]

country0n=[]
country0n.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[0] ].item() )
country0n.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[1]].item() )


country1n = []
country1n.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[0]].item() )
country1n.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[1]].item() )

country2n = []
country2n.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[0]].item() )
country2n.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[1]].item() )


country3n =[]
country3n.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[0]].item() )
country3n.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[1]].item() )

#country4n =[]
#country4n.append(df_normal[df_normal["Country name"]==country_list[4]][variable_list[0]].item() )
#country4n.append(df_normal[df_normal["Country name"]==country_list[4]][variable_list[1]].item() )



country3n



`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version



[0.9066666666666664, 0.5111111111111111]

In [38]:

N = len(country0n) #number of variables to be tested



fig, ax = plt.subplots(figsize = (6,6))

ind = np.arange(N)    # the x locations for the groups
width = 0.14        # the width of the bars. This is pretty much the place to control spacing
ax.bar(ind + width*0, country0n, width, bottom=0,  label=country_list[0],color = color[0])
ax.bar(ind + width*1, country1n, width, bottom=0, label=country_list[1],color = color[1])
ax.bar(ind + width*2, country2n, width, bottom=0, label=country_list[2],color = color[2])
ax.bar(ind + width*3, country3n, width, bottom=0, label=country_list[3],color = color[3])
#ax.bar(ind + width*4, country4n, width, bottom=0, label=country_list[4],color = color[4])

##add additional legends here##

#women_std = (30*cm, 25*cm, 20*cm, 31*cm, 22*cm)



ax.set_title('Top Factors Correlated with Unhappiness')
ax.set_xticks(ind + width * 2) #The location of G1, G2 label
ax.set_xticklabels((variable_list[0], variable_list[1] ))

ax.legend(loc='upper right')
#ax.yaxis.set_units(inch)
plt.tight_layout()
plt.xlabel("Variables", labelpad=1)
plt.ylabel("Normalized Scores", labelpad=1)
plt.savefig(f"../../Images/Happiness Outside Negative Factor - Poor.png")
plt.show()

<IPython.core.display.Javascript object>

### Positive Factors on "Poorer", But Happier Countries

In [39]:
#Preparing the data


variable_list= ["Rev tourism","Average age" ]

country0n=[]
country0n.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[0] ].item() )
country0n.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[1]].item() )


country1n = []
country1n.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[0]].item() )
country1n.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[1]].item() )

country2n = []
country2n.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[0]].item() )
country2n.append(df_normal[df_normal["Country name"]==country_list[2]][variable_list[1]].item() )


country3n =[]
country3n.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[0]].item() )
country3n.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[1]].item() )

#country4n =[]
#country4n.append(df_normal[df_normal["Country name"]==country_list[4]][variable_list[0]].item() )
#country4n.append(df_normal[df_normal["Country name"]==country_list[4]][variable_list[1]].item() )



country3n



`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version



[0.0032799529277058623, 0.09404388714733536]

In [40]:

N = len(country0n) #number of variables to be tested



fig, ax = plt.subplots(figsize = (6,6))

ind = np.arange(N)    # the x locations for the groups
width = 0.14        # the width of the bars. This is pretty much the place to control spacing
ax.bar(ind + width*0, country0n, width, bottom=0,  label=country_list[0],color = color[0])
ax.bar(ind + width*1, country1n, width, bottom=0, label=country_list[1],color = color[1])
ax.bar(ind + width*2, country2n, width, bottom=0, label=country_list[2],color = color[2])
ax.bar(ind + width*3, country3n, width, bottom=0, label=country_list[3],color = color[3])
#ax.bar(ind + width*4, country4n, width, bottom=0, label=country_list[4],color = color[4])

##add additional legends here##

#women_std = (30*cm, 25*cm, 20*cm, 31*cm, 22*cm)



ax.set_title('Top Factors Correlated with Happiness')
ax.set_xticks(ind + width) #The location of G1, G2 label
ax.set_xticklabels((variable_list[0], variable_list[1] ))

ax.legend(loc='upper right')
#ax.yaxis.set_units(inch)
plt.tight_layout()
plt.xlabel("Variables",labelpad=1)
plt.ylabel("Normalized Scores", labelpad=1)
plt.savefig(f"../../Images/Happiness Outside Positive Factor - Poor.png")
plt.show()



<IPython.core.display.Javascript object>

### Happiness Overtime

In [41]:
#import all Happiness Score
all_path=("../../Data/New_HappyIndex_mx.csv")
df_all = pd.read_csv(all_path)
df_all

Unnamed: 0,Country name,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,Afghanistan,3.72,3.72,4.40,4.76,3.83,3.78,3.57,3.13,3.98,4.22,2.66
1,Albania,4.63,5.49,5.49,5.27,5.87,5.51,4.55,4.81,4.61,4.51,4.64
2,Argentina,6.07,5.96,6.42,6.44,6.78,6.47,6.58,6.67,6.70,6.43,6.04
3,Armenia,4.88,4.65,4.18,4.37,4.26,4.32,4.28,4.45,4.35,4.33,4.29
4,Australia,7.29,7.25,7.45,7.45,7.41,7.20,7.36,7.29,7.31,7.25,7.26
...,...,...,...,...,...,...,...,...,...,...,...,...
123,Venezuela,6.26,6.26,7.19,7.48,6.58,7.07,6.55,6.14,5.57,4.04,5.07
124,Vietnam,5.42,5.48,5.30,5.30,5.77,5.53,5.02,5.08,5.08,5.06,5.18
125,Yemen,4.48,4.81,4.81,4.35,3.75,4.06,4.22,3.97,2.98,3.83,3.25
126,Zambia,4.00,4.73,5.26,5.00,5.00,5.01,5.24,4.35,4.84,4.35,3.93


In [42]:


country0= df_all[ df_all["Country name"] == country_list[0] ]
country0= country0.set_index("Country name").transpose().rename(columns={country_list[0]:"Happiness Index" } )
country0["Country"] = country_list[0]

country1= df_all[ df_all["Country name"] == country_list[1] ]
country1= country1.set_index("Country name").transpose().rename(columns={country_list[1]:"Happiness Index" } )
country1["Country"] = country_list[1]

country2= df_all[ df_all["Country name"] == country_list[2] ]
country2= country2.set_index("Country name").transpose().rename(columns={country_list[2]:"Happiness Index" } )
country2["Country"] = country_list[2]

country3= df_all[ df_all["Country name"] == country_list[3] ]
country3= country3.set_index("Country name").transpose().rename(columns={country_list[3]:"Happiness Index" } )
country3["Country"] = country_list[3]

#country4= df_all[ df_all["Country name"] == country_list[4] ]
#country4= country4.set_index("Country name").transpose().rename(columns={country_list[4]:"Happiness Index" } )
#country4["Country"] = country_list[4]

#country5= df_all[ df_all["Country name"] == country_list[5] ]
#country5= country5.set_index("Country name").transpose().rename(columns={country_list[5]:"Happiness Index" } )
#country5["Country"] = country_list[5]

country6=[]
country6= df_all[ df_all["Country name"] == "Canada" ]
country6= country6.set_index("Country name").transpose().rename(columns={"Canada":"Happiness Index" } )
country6["Country"] = "Canada"

country5

Country name,Happiness Index,Country
2007,4.59,Iraq
2008,4.59,Iraq
2009,4.78,Iraq
2010,5.07,Iraq
2011,4.73,Iraq
2012,4.66,Iraq
2013,4.73,Iraq
2014,4.54,Iraq
2015,4.49,Iraq
2016,4.41,Iraq


### Chart with Canada

In [43]:
plt.figure(figsize=(8,7))

plt.plot(country0.index, country0["Happiness Index"], label= country_list[0],color = color[0] )
plt.plot(country1.index, country1["Happiness Index"], label= country_list[1],color = color[1] )
plt.plot(country2.index, country2["Happiness Index"], label= country_list[2],color = color[2] )
plt.plot(country3.index, country3["Happiness Index"], label= country_list[3],color = color[3] )
#plt.plot(country4.index, country4["Happiness Index"], label= country_list[4],color = color[4] )
#plt.plot(country4.index, country4["Happiness Index"], label= country_list[4] )
#plt.plot(country5.index, country5["Happiness Index"], label= country_list[5] )
plt.bar(country6.index, country6["Happiness Index"], label= "Canada", color = color[6], alpha=0.4 )

plt.legend(loc="best" )
plt.xlabel("Year")
plt.ylabel("Happiness Index")
plt.title("Happiness Overtime")
plt.savefig("../Images/Poor Line with Canada.jpg")
plt.grid(False)

<IPython.core.display.Javascript object>

### Chart without Canada

In [44]:
plt.figure(figsize=(8,7))

plt.plot(country0.index, country0["Happiness Index"], label= country_list[0],color = color[0] )
plt.plot(country1.index, country1["Happiness Index"], label= country_list[1],color = color[1] )
plt.plot(country2.index, country2["Happiness Index"], label= country_list[2],color = color[2] )
plt.plot(country3.index, country3["Happiness Index"], label= country_list[3],color = color[3] )
#plt.plot(country4.index, country4["Happiness Index"], label= country_list[4],color = color[4] )
#plt.plot(country4.index, country4["Happiness Index"], label= country_list[4] )
#plt.plot(country5.index, country5["Happiness Index"], label= country_list[5] )
#plt.bar(country6.index, country5["Happiness Index"], label= "Canada", color = color[6] )

plt.legend(loc="best" )
plt.xlabel("Year")
plt.ylim(0,8)
plt.ylabel("Happiness Index")
plt.title("Happiness Overtime")
plt.savefig("../Images/Poor Line.jpg")
plt.grid(False)

<IPython.core.display.Javascript object>

# Extra: Canada and USA

In [45]:
#Preparing the data

country_list=["Canada","United States"]
variable_list= ["Log GDP","Social support","Healthy life exp","Freedom","Perception of Corruption","Generosity" ]


country0b=[]
country0b.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[0] ].item() )
country0b.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[1]].item() )
country0b.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[2]].item() )
country0b.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[3]].item() )
country0b.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[4]].item() )
country0b.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[5]].item() )

country1b = []
country1b.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[0]].item() )
country1b.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[1]].item() )
country1b.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[2]].item() )
country1b.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[3]].item() )
country1b.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[4]].item() )
country1b.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[5]].item() )


#country4b =[]
#country4b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[0]].item() )
##country4b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[1]].item() )
#country4b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[2]].item() )
#country4b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[3]].item() )
#country4b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[4]].item() )
#country4b.append(df_normal[df_normal["Country name"]==country_list[3]][variable_list[5]].item() )




`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version



### Happiness Index Factors Canada vs USA

In [46]:

N = len(country0b) 

fig, ax = plt.subplots(figsize = (10,6))

ind = np.arange(N)    # the x locations for the groups
width = 0.15        # the width of the bars. This is pretty much the place to control spacing
ax.bar(ind + width*0, country0b, width, bottom=0,  label=country_list[0],color = color[0])
ax.bar(ind + width*1, country1b, width, bottom=0, label=country_list[1],color = color[1])
#ax.bar(ind + width*2, country2b, width, bottom=0, label=country_list[2],color = color[2])
#ax.bar(ind + width*3, country3b, width, bottom=0, label=country_list[3],color = color[3])
#ax.bar(ind + width*4, country4b, width, bottom=0, label=country_list[4],color = color[4])
#ax.bar(ind + width*4, country4b, width, bottom=0, label=country_list[5],color = color[5])

ax.set_title('How Do Selected Countries Perform on Happiness Index Factors?')
ax.set_xticks(ind + width ) #The location of label
ax.set_xticklabels((variable_list[0], variable_list[1],  variable_list[2],  variable_list[3],variable_list[4],variable_list[5] ))

ax.legend(loc='upper right')


plt.xlabel("Variables")
plt.ylabel("Normalized Scores")
plt.savefig(f"../../Images/Happiness Index Factor - CANUSA.png")
plt.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

### Negative Factors, Canada vs USA

In [47]:
#Preparing the data


variable_list= ["Brain drain","Security Threat" ]

country0n=[]
country0n.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[0] ].item() )
country0n.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[1]].item() )


country1n = []
country1n.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[0]].item() )
country1n.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[1]].item() )


#country4n =[]
#country4n.append(df_normal[df_normal["Country name"]==country_list[4]][variable_list[0]].item() )
#country4n.append(df_normal[df_normal["Country name"]==country_list[4]][variable_list[1]].item() )



country3n



`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version



[0.0032799529277058623, 0.09404388714733536]

In [48]:

N = len(country0n) #number of variables to be tested



fig, ax = plt.subplots(figsize = (6,6))

ind = np.arange(N)    # the x locations for the groups
width = 0.14        # the width of the bars. This is pretty much the place to control spacing
ax.bar(ind + width*0, country0n, width, bottom=0,  label=country_list[0],color = color[0])
ax.bar(ind + width*1, country1n, width, bottom=0, label=country_list[1],color = color[1])
#ax.bar(ind + width*2, country2n, width, bottom=0, label=country_list[2],color = color[2])
#ax.bar(ind + width*3, country3n, width, bottom=0, label=country_list[3],color = color[3])
#ax.bar(ind + width*4, country4n, width, bottom=0, label=country_list[4],color = color[4])

##add additional legends here##

#women_std = (30*cm, 25*cm, 20*cm, 31*cm, 22*cm)



ax.set_title('Top Factors Correlated with Unhappiness')
ax.set_xticks(ind + width/2) #The location of G1, G2 label
ax.set_xticklabels((variable_list[0], variable_list[1] ))

ax.legend(loc='upper right')
#ax.yaxis.set_units(inch)
plt.tight_layout()
plt.xlabel("Variables", labelpad=1)
plt.ylabel("Normalized Scores", labelpad=1)
plt.savefig(f"../../Images/Happiness Outside Negative Factor - CANUSA.png")
plt.show()

<IPython.core.display.Javascript object>

### Positive Factors, Canada vs USA

In [49]:
#Preparing the data


variable_list= ["Rev tourism","Average age" ]

country0n=[]
country0n.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[0] ].item() )
country0n.append(df_normal[df_normal["Country name"]==country_list[0]][variable_list[1]].item() )


country1n = []
country1n.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[0]].item() )
country1n.append(df_normal[df_normal["Country name"]==country_list[1]][variable_list[1]].item() )


#country4n =[]
#country4n.append(df_normal[df_normal["Country name"]==country_list[4]][variable_list[0]].item() )
#country4n.append(df_normal[df_normal["Country name"]==country_list[4]][variable_list[1]].item() )



country3n



`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version


`item` has been deprecated and will be removed in a future version



[0.0032799529277058623, 0.09404388714733536]

In [50]:

N = len(country0n) #number of variables to be tested



fig, ax = plt.subplots(figsize = (6,6))

ind = np.arange(N)    # the x locations for the groups
width = 0.14        # the width of the bars. This is pretty much the place to control spacing
ax.bar(ind + width*0, country0n, width, bottom=0,  label=country_list[0],color = color[0])
ax.bar(ind + width*1, country1n, width, bottom=0, label=country_list[1],color = color[1])
#ax.bar(ind + width*2, country2n, width, bottom=0, label=country_list[2],color = color[2])
#ax.bar(ind + width*3, country3n, width, bottom=0, label=country_list[3],color = color[3])
#ax.bar(ind + width*4, country4n, width, bottom=0, label=country_list[4],color = color[4])

##add additional legends here##

#women_std = (30*cm, 25*cm, 20*cm, 31*cm, 22*cm)



ax.set_title('Top Factors Correlated with Happiness')
ax.set_xticks(ind + width /2) #The location of G1, G2 label
ax.set_xticklabels((variable_list[0], variable_list[1] ))

ax.legend(loc='upper right')
#ax.yaxis.set_units(inch)
plt.tight_layout()
plt.xlabel("Variables",labelpad=1)
plt.ylabel("Normalized Scores", labelpad=1)
plt.savefig(f"../../Images/Happiness Outside Positive Factor - CANUSA.png")
plt.show()



<IPython.core.display.Javascript object>