* **Step 1: A look into collective happiness for the Top 50 Happiest Countries in the World by Region

In [1]:
## Import Dependencies
import matplotlib.pyplot as plt
import sklearn.datasets as dta
from scipy.stats import linregress
import pandas as pd
import scipy.stats as sts
import numpy as np
import random
import math as math
import seaborn as sns
import os

In [2]:
## Create Data file Paths to open World Happiness Report, 2017 
whr_2017_path = "../World_Happiness_Reports/2017.csv"
region_data_path = "Region_Data.csv"

## Read WHR and region data, to find out what the top 50 countries are:
whr_2017_df = pd.read_csv(whr_2017_path)
region_data_df = pd.read_csv(region_data_path)

## Display the data table for preview
whr_2017_df.head()

Unnamed: 0,Country,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual
0,Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027
1,Denmark,2,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.35528,0.40077,2.313707
2,Iceland,3,7.504,7.62203,7.38597,1.480633,1.610574,0.833552,0.627163,0.47554,0.153527,2.322715
3,Switzerland,4,7.494,7.561772,7.426227,1.56498,1.516912,0.858131,0.620071,0.290549,0.367007,2.276716
4,Finland,5,7.469,7.527542,7.410458,1.443572,1.540247,0.809158,0.617951,0.245483,0.382612,2.430182


In [4]:
## Then Sort and selected top 50 countries: 
top_50_sorted_df = whr_2017_df.sort_values("Happiness.Rank", ascending=True).head(50)

## Write data to CSV
#top_50_sorted_df.to_csv('Top_50_Carolina.csv')

## Display the data table for preview
#top_50_sorted_df.head()

In [5]:
## Use merge function to combine the top 50 happiness countries and regions into a single dataset:
merged_countries_regions_df = pd.merge(top_50_sorted_df, region_data_df, left_on="Country", right_on="name")

## Write data to CSV
#merged_countries_regions_df.to_csv('merged_countries_regions.csv')

## Display the data table for preview
merged_countries_regions_df.head(10)

Unnamed: 0,Country,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,...,alpha-2,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code
0,Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,...,NO,NOR,578,ISO 3166-2:NO,Europe,Northern Europe,,150.0,154.0,
1,Denmark,2,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.35528,...,DK,DNK,208,ISO 3166-2:DK,Europe,Northern Europe,,150.0,154.0,
2,Iceland,3,7.504,7.62203,7.38597,1.480633,1.610574,0.833552,0.627163,0.47554,...,IS,ISL,352,ISO 3166-2:IS,Europe,Northern Europe,,150.0,154.0,
3,Switzerland,4,7.494,7.561772,7.426227,1.56498,1.516912,0.858131,0.620071,0.290549,...,CH,CHE,756,ISO 3166-2:CH,Europe,Western Europe,,150.0,155.0,
4,Finland,5,7.469,7.527542,7.410458,1.443572,1.540247,0.809158,0.617951,0.245483,...,FI,FIN,246,ISO 3166-2:FI,Europe,Northern Europe,,150.0,154.0,
5,Netherlands,6,7.377,7.427426,7.326574,1.503945,1.428939,0.810696,0.585384,0.47049,...,NL,NLD,528,ISO 3166-2:NL,Europe,Western Europe,,150.0,155.0,
6,Canada,7,7.316,7.384403,7.247597,1.479204,1.481349,0.834558,0.611101,0.43554,...,CA,CAN,124,ISO 3166-2:CA,Americas,Northern America,,19.0,21.0,
7,New Zealand,8,7.314,7.37951,7.24849,1.405706,1.548195,0.81676,0.614062,0.500005,...,NZ,NZL,554,ISO 3166-2:NZ,Oceania,Australia and New Zealand,,9.0,53.0,
8,Sweden,9,7.284,7.344095,7.223905,1.494387,1.478162,0.830875,0.612924,0.385399,...,SE,SWE,752,ISO 3166-2:SE,Europe,Northern Europe,,150.0,154.0,
9,Australia,10,7.284,7.356651,7.211349,1.484415,1.510042,0.843887,0.601607,0.477699,...,AU,AUS,36,ISO 3166-2:AU,Oceania,Australia and New Zealand,,9.0,53.0,


In order to group countries into regions: 


In [9]:
## Drop countries 
clean_df = merged_countries_regions_df.drop(['Country'], axis=1)

## Using GroupBy Sub Region
grouped_regions_df = clean_df.groupby(['sub-region']).mean()

## Write data to CSV
#grouped_regions_df.to_csv('grouped_regions_df.csv')

## Rename Index Column
grouped_regions_df.index.name = 'Region'

## Display
grouped_regions_df.head(5)

Unnamed: 0_level_0,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual,country-code,region-code,sub-region-code,intermediate-region-code
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Australia and New Zealand,9.0,7.299,7.368081,7.229919,1.44506,1.529119,0.830323,0.607835,0.488852,0.342,2.055834,295.0,9.0,53.0,
Central Asia,47.0,5.971,6.065538,5.876463,0.786441,1.548969,0.498273,0.658249,0.415984,0.246528,1.816914,860.0,142.0,143.0,
Eastern Asia,33.0,6.422,6.494596,6.349404,1.433627,1.384565,0.793984,0.361467,0.25836,0.063829,2.126607,158.0,142.0,30.0,
Eastern Europe,39.5,6.16075,6.236349,6.085152,1.31291,1.463485,0.6785,0.420222,0.108845,0.038339,2.138242,541.25,150.0,151.0,
Latin America and the Caribbean,31.857143,6.390429,6.506846,6.274011,1.079964,1.317385,0.647587,0.488664,0.203399,0.094663,2.558736,338.071429,19.0,419.0,10.714286


* **Step 2: Explore and clean data for "Suicide Rates" and "Family Status" for the Top 50 Happiest Countries in the World:
        

In [10]:
# Study data files for suicide report 2017 for top 50 happiest countries
top_50_path = "../Suicide_Report/2017.csv"
suicide_rate_path = "../Suicide_Report/clean_suicide.csv"

# Read the mouse data and the study results
top_50 = pd.read_csv(top_50_path)
clean_suicide = pd.read_csv(suicide_rate_path)

# Combine the suicide report and happiness report into a single dataset 
merged_df = pd.merge(top_50, clean_suicide, on="Country")

# Display the data table for preview 
merged_df.head(10)

Unnamed: 0,Country,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual,Code,Death_rate
0,Norway,1.0,7.54,7.59,7.48,1.62,1.53,0.8,0.64,0.36,0.32,2.28,NOR,9.27
1,Denmark,2.0,7.52,7.58,7.46,1.48,1.55,0.79,0.63,0.36,0.4,2.31,DNK,8.8
2,Iceland,3.0,7.5,7.62,7.39,1.48,1.61,0.83,0.63,0.48,0.15,2.32,ISL,9.61
3,Switzerland,4.0,7.49,7.56,7.43,1.56,1.52,0.86,0.62,0.29,0.37,2.28,CHE,9.32
4,Finland,5.0,7.47,7.53,7.41,1.44,1.54,0.81,0.62,0.25,0.38,2.43,FIN,12.94
5,Netherlands,6.0,7.38,7.43,7.33,1.5,1.43,0.81,0.59,0.47,0.28,2.29,NLD,9.17
6,Canada,7.0,7.32,7.38,7.25,1.48,1.48,0.83,0.61,0.44,0.29,2.19,CAN,10.91
7,New Zealand,8.0,7.31,7.38,7.25,1.41,1.55,0.82,0.61,0.5,0.38,2.05,NZL,11.3
8,Sweden,9.0,7.28,7.34,7.22,1.49,1.48,0.83,0.61,0.39,0.38,2.1,SWE,11.08
9,Australia,10.0,7.28,7.36,7.21,1.48,1.51,0.84,0.6,0.48,0.3,2.07,AUS,11.05


In [11]:
#Clean Up of data for specific analysis 

#Rename columns to add "Family" and "Suicide_Rate" and merge family and suicide rate into happiness report:
merged_df.columns = ['Country', 'Rank', 'Happy_Score', 'Top_Whisker','Low_Whisker','GDP','Family','Life','Freedom','Generosity','Trust_Gov','Dystopia','Country_Code','Suicide_Rate']
merged_df.head(10)

#Pull specific columns that will be used in analysis
clean_df = merged_df[['Rank','Country_Code','Country','Happy_Score','Family','Suicide_Rate']]

#To round and format values correctly in table
#clean_df["Rank"] = clean_df["Rank"].map("#{:.0f}".format)
#clean_df
clean_df.head(10)

Unnamed: 0,Rank,Country_Code,Country,Happy_Score,Family,Suicide_Rate
0,1.0,NOR,Norway,7.54,1.53,9.27
1,2.0,DNK,Denmark,7.52,1.55,8.8
2,3.0,ISL,Iceland,7.5,1.61,9.61
3,4.0,CHE,Switzerland,7.49,1.52,9.32
4,5.0,FIN,Finland,7.47,1.54,12.94
5,6.0,NLD,Netherlands,7.38,1.43,9.17
6,7.0,CAN,Canada,7.32,1.48,10.91
7,8.0,NZL,New Zealand,7.31,1.55,11.3
8,9.0,SWE,Sweden,7.28,1.48,11.08
9,10.0,AUS,Australia,7.28,1.51,11.05


* **Step 3: Explore and clean data for "Life Expectancy" and "Population Size"
    

In [None]:
# Getting datas from Happiness Report for top 50 happiest countries and top 10 least happy countries, and populations & region data for these countries

In [None]:
# Merge happiness report, population and region data for the top 50 happy and top 10 least happy into one data set: 

In [None]:
dataset2017=pd.read_csv('2017.csv')
datasetppl=pd.read_csv('POPULATION.csv')
datasetregion=pd.read_csv('region.csv')
dataset2017=dataset2017.iloc[0:50,:]
datasetleast=pd.read_csv('2017.csv')
datasetleast=datasetleast.iloc[145:155,:]

dataset2017 = pd.concat([dataset2017, datasetleast]).reset_index(drop=True)


dataset2017['region']="region"
dataset2017['population']="population"

for i in range(0,60):
    print(i)
    if i==32:
        continue
    if i==39:
        continue
    if i==48:
        continue
    if i==49:
        continue
    if i==50:
        continue
    if i==51:
        continue
    if i==56:
        continue
    
    country=dataset2017['Country'][i]
    index=datasetregion[datasetregion['Country'] ==country].index[0]
    dataset2017['region'][i]=datasetregion['Region'][index]
    indexppl=datasetppl[datasetppl['Country Name'] ==country].index[0]
    dataset2017['population'][i]=datasetppl['POPULATION'][index]

In [12]:
#importing the dataset to put top 50 happiest countries and top 10 least happy countries including population and region into one dataset:
dataset2017=pd.read_csv('FINAL.csv')
dataset2017.head()

Unnamed: 0,Country,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual,region,Population
0,Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027,Western Europe,5276968
1,Denmark,2,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.35528,0.40077,2.313707,Western Europe,5764980
2,Iceland,3,7.504,7.62203,7.38597,1.480633,1.610574,0.833552,0.627163,0.47554,0.153527,2.322715,Western Europe,343400
3,Switzerland,4,7.494,7.561772,7.426227,1.56498,1.516912,0.858131,0.620071,0.290549,0.367007,2.276716,Western Europe,8451840
4,Finland,5,7.469,7.527542,7.410458,1.443572,1.540247,0.809158,0.617951,0.245483,0.382612,2.430182,Western Europe,5508214


In [None]:
# Finally, we used the same dataset to analyze government trust and freedom. 