In [1]:
# Dependencies and Setup
import pandas as pd

# Show path to csv files
data_2015_path = "Resources/2015.csv"
data_2016_path = "Resources/2016.csv"
data_2017_path = "Resources/2017.csv"
data_2018_path = "Resources/2018.csv"
data_2019_path = "Resources/2019.csv"

# Read csv files through Pandas
data_2015 = pd.read_csv(data_2015_path)
data_2016 = pd.read_csv(data_2016_path)
data_2017 = pd.read_csv(data_2017_path)
data_2018 = pd.read_csv(data_2018_path)
data_2019 = pd.read_csv(data_2019_path)


# Adding year suffix to columns
data_2015 = data_2015.rename(columns={col: col + " 2015" for col in data_2015.columns if col not in ['Country', 'Region']})
data_2016 = data_2016.rename(columns={col: col + " 2016" for col in data_2016.columns if col not in ['Country', 'Region']})
data_2017 = data_2017.rename(columns={col: col + " 2017" for col in data_2017.columns if col not in ['Country', 'Region']})
data_2018 = data_2018.rename(columns={col: col + " 2018" for col in data_2018.columns if col not in ['Country', 'Region']})
data_2019 = data_2019.rename(columns={col: col + " 2019" for col in data_2019.columns if col not in ['Country', 'Region']})


# Merging Data on Country and Region
complete_data = pd.merge(data_2015, data_2016, on = ["Country", "Region"], how = "outer")
complete_data = pd.merge(complete_data, data_2017, on = ["Country", "Region"], how = "outer")
complete_data = pd.merge(complete_data, data_2018, on = ["Country", "Region"], how = "outer")
complete_data = pd.merge(complete_data, data_2019, on = ["Country", "Region"], how = "outer")

# Removing data with incomplete values
complete_data = complete_data.dropna()
complete_data

Unnamed: 0,Country,Region,Happiness Rank 2015,Happiness Score 2015,Economy (GDP per Capita) 2015,Family 2015,Health (Life Expectancy) 2015,Freedom 2015,Trust (Government Corruption) 2015,Generosity 2015,...,Trust (Government Corruption) 2018,Generosity 2018,Happiness Rank 2019,Happiness Score 2019,Economy (GDP per Capita) 2019,Family 2019,Health (Life Expectancy) 2019,Freedom 2019,Trust (Government Corruption) 2019,Generosity 2019
0,Switzerland,Western Europe,1.0,7.587,1.397,1.350,0.941,0.666,0.420,0.297,...,0.357,0.256,6.0,7.480,1.452,1.526,1.052,0.572,0.343,0.263
1,Iceland,Western Europe,2.0,7.561,1.302,1.402,0.948,0.629,0.141,0.436,...,0.138,0.353,4.0,7.494,1.380,1.624,1.026,0.591,0.118,0.354
2,Denmark,Western Europe,3.0,7.527,1.325,1.361,0.875,0.649,0.484,0.341,...,0.408,0.284,2.0,7.600,1.383,1.573,0.996,0.592,0.410,0.252
3,Norway,Western Europe,4.0,7.522,1.459,1.331,0.885,0.670,0.365,0.347,...,0.340,0.286,3.0,7.554,1.488,1.582,1.028,0.603,0.341,0.271
4,Canada,North America,5.0,7.427,1.326,1.323,0.906,0.633,0.330,0.458,...,0.291,0.321,9.0,7.278,1.365,1.505,1.039,0.584,0.308,0.285
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,Rwanda,Sub-Saharan Africa,154.0,3.465,0.222,0.774,0.429,0.592,0.552,0.226,...,0.444,0.200,152.0,3.334,0.359,0.711,0.614,0.555,0.411,0.217
154,Benin,Sub-Saharan Africa,155.0,3.340,0.287,0.354,0.319,0.485,0.080,0.183,...,0.067,0.163,102.0,4.883,0.393,0.437,0.397,0.349,0.082,0.175
155,Syria,Middle East and Northern Africa,156.0,3.006,0.663,0.475,0.722,0.157,0.189,0.472,...,0.144,0.376,149.0,3.462,0.619,0.378,0.440,0.013,0.141,0.331
156,Burundi,Sub-Saharan Africa,157.0,2.905,0.015,0.416,0.224,0.119,0.101,0.197,...,0.076,0.149,145.0,3.775,0.046,0.447,0.380,0.220,0.180,0.176


In [2]:
# 2015 Data summary of mean and maximum
data_2015_summary = data_2015.loc[:, "Economy (GDP per Capita) 2015":].agg(["mean", "max"])
data_2015_summary

# 2016 Data summary of mean and maximum
data_2016_summary = data_2016.loc[:, "Economy (GDP per Capita) 2016":].agg(["mean", "max"])
data_2016_summary

# 2017 Data summary of mean and maximum
data_2017_summary = data_2017.loc[:, "Economy (GDP per Capita) 2017":].agg(["mean", "max"])
data_2017_summary

# 2018 Data summary of mean and maximum
data_2018_summary = data_2018.loc[:, "Economy (GDP per Capita) 2018":].agg(["mean", "max"])
data_2018_summary

# 2019 Data summary of mean and maximum (for Iceland  -->  maybe?)
data_2019_summary = data_2019.loc[:"Iceland", "Economy (GDP per Capita) 2019":].agg(["mean", "max"])
data_2019_summary

Unnamed: 0,Economy (GDP per Capita) 2019,Family 2019,Health (Life Expectancy) 2019,Freedom 2019,Trust (Government Corruption) 2019,Generosity 2019
mean,0.905147,1.208814,0.725244,0.392571,0.110603,0.184846
max,1.684,1.624,1.141,0.631,0.453,0.566


In [3]:
# Summary of Happiness Rank per year
happiness_ranking = complete_data[["Country", "Region", "Happiness Rank 2015","Happiness Rank 2016","Happiness Rank 2017","Happiness Rank 2018","Happiness Rank 2019"]]
happiness_ranking

# Summary of Happiness Score per year
happiness_score = complete_data[["Country", "Region", "Happiness Score 2015","Happiness Score 2016","Happiness Score 2017","Happiness Score 2018","Happiness Score 2019"]]
happiness_score

# Summary of Economy GDP per year
economy_gdp = complete_data[["Country", "Region", "Economy (GDP per Capita) 2015","Economy (GDP per Capita) 2016","Economy (GDP per Capita) 2017","Economy (GDP per Capita) 2018","Economy (GDP per Capita) 2019"]]
economy_gdp

# Summary of Family per year
family = complete_data[["Country", "Region", "Family 2015","Family 2016","Family 2017","Family 2018","Family 2019"]]
family

# Summary of Life Expectancy per year
life_expectancy = complete_data[["Country", "Region", "Health (Life Expectancy) 2015","Health (Life Expectancy) 2016","Health (Life Expectancy) 2017","Health (Life Expectancy) 2018","Health (Life Expectancy) 2019"]]
life_expectancy

# Summary of Freedom per year
freedom = complete_data[["Country", "Region", "Freedom 2015","Freedom 2016","Freedom 2017","Freedom 2018","Freedom 2019"]]
freedom

# Summary of Government Trust Rank per year
government_trust = complete_data[["Country", "Region", "Trust (Government Corruption) 2015","Trust (Government Corruption) 2016","Trust (Government Corruption) 2017","Trust (Government Corruption) 2018","Trust (Government Corruption) 2019"]]
government_trust

# Summary of Genorosity per year
generosity = complete_data[["Country", "Region", "Generosity 2015","Generosity 2016","Generosity 2017","Generosity 2018","Generosity 2019"]]
generosity

Unnamed: 0,Country,Region,Generosity 2015,Generosity 2016,Generosity 2017,Generosity 2018,Generosity 2019
0,Switzerland,Western Europe,0.297,0.281,0.291,0.256,0.263
1,Iceland,Western Europe,0.436,0.477,0.476,0.353,0.354
2,Denmark,Western Europe,0.341,0.362,0.355,0.284,0.252
3,Norway,Western Europe,0.347,0.379,0.362,0.286,0.271
4,Canada,North America,0.458,0.448,0.436,0.321,0.285
...,...,...,...,...,...,...,...
153,Rwanda,Sub-Saharan Africa,0.226,0.236,0.253,0.200,0.217
154,Benin,Sub-Saharan Africa,0.183,0.202,0.208,0.163,0.175
155,Syria,Middle East and Northern Africa,0.472,0.484,0.494,0.376,0.331
156,Burundi,Sub-Saharan Africa,0.197,0.203,0.204,0.149,0.176


In [None]:
generosity["Averages"]= generosity.mean()

In [4]:
# Region Summary of averages of each characteristic
region_summary = complete_data.groupby("Region")
region_summary.mean()

Unnamed: 0_level_0,Happiness Rank 2015,Happiness Score 2015,Economy (GDP per Capita) 2015,Family 2015,Health (Life Expectancy) 2015,Freedom 2015,Trust (Government Corruption) 2015,Generosity 2015,Happiness Rank 2016,Happiness Score 2016,...,Trust (Government Corruption) 2018,Generosity 2018,Happiness Rank 2019,Happiness Score 2019,Economy (GDP per Capita) 2019,Family 2019,Health (Life Expectancy) 2019,Freedom 2019,Trust (Government Corruption) 2019,Generosity 2019
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Australia and New Zealand,9.5,7.285,1.292,1.3145,0.92,0.645,0.3925,0.4555,8.5,7.3235,...,0.3455,0.363,9.5,7.2675,1.3375,1.5525,1.031,0.571,0.335,0.331
Central and Eastern Europe,78.5,5.344571,0.94325,1.054821,0.718107,0.359071,0.087821,0.149643,77.857143,5.379607,...,0.062643,0.131571,70.5,5.571786,1.022429,1.340393,0.8085,0.357964,0.06275,0.141214
Eastern Asia,69.25,5.49625,1.0585,1.11575,0.844,0.44525,0.07875,0.17675,73.5,5.477,...,0.084,0.134,72.0,5.56425,1.15125,1.3235,0.921,0.3605,0.0835,0.13425
Latin America and Caribbean,47.55,6.1373,0.8542,1.10745,0.71305,0.49415,0.12155,0.2151,50.5,6.0505,...,0.0793,0.13785,52.0,5.94255,0.90895,1.33285,0.8168,0.4385,0.07485,0.1426
Middle East and Northern Africa,83.888889,5.243556,1.030611,0.900167,0.696667,0.331167,0.162389,0.185,80.888889,5.320111,...,0.122944,0.159556,88.444444,5.148778,1.034389,1.139722,0.746944,0.302389,0.100722,0.147444
North America,10.0,7.273,1.3605,1.285,0.884,0.5895,0.2445,0.4295,9.5,7.254,...,0.212,0.306,14.0,7.085,1.399,1.481,0.9565,0.519,0.218,0.2825
Southeastern Asia,79.0,5.372625,0.81375,0.965625,0.693375,0.552125,0.13975,0.419,77.25,5.39675,...,0.12375,0.321375,82.625,5.333375,0.95075,1.2735,0.769625,0.543375,0.118375,0.30625
Southern Asia,113.142857,4.580857,0.560429,0.645286,0.540857,0.373286,0.102571,0.341429,111.714286,4.563286,...,0.097857,0.243571,115.857143,4.526857,0.650286,0.986857,0.617,0.386286,0.093429,0.234714
Sub-Saharan Africa,131.096774,4.120419,0.381387,0.806935,0.299484,0.381484,0.11171,0.221452,131.806452,4.074839,...,0.085097,0.177806,122.322581,4.380323,0.466226,0.938097,0.440839,0.342226,0.087645,0.18829
Western Europe,27.7,6.73935,1.30305,1.2561,0.9084,0.55305,0.23585,0.3041,27.55,6.7314,...,0.21415,0.22385,21.35,6.8984,1.36205,1.4839,1.01385,0.48255,0.2212,0.22105


In [5]:
# Extract "name", "goal", "pledged", "state", "country", "staff_pick",
# "backers_count", and "spotlight"
#reduced_kickstarter_df = df.loc[:, ["name", "goal", "pledged",
                                   # "state", "country", "staff_pick", "backers_count", "spotlight"]]
#reduced_kickstarter_df
happiness_ranking.head()
#happiness_ranking = df.loc[:, ["Region", ""]]


Unnamed: 0,Country,Region,Happiness Rank 2015,Happiness Rank 2016,Happiness Rank 2017,Happiness Rank 2018,Happiness Rank 2019
0,Switzerland,Western Europe,1.0,2.0,4.0,5.0,6.0
1,Iceland,Western Europe,2.0,3.0,3.0,4.0,4.0
2,Denmark,Western Europe,3.0,1.0,2.0,3.0,2.0
3,Norway,Western Europe,4.0,4.0,1.0,2.0,3.0
4,Canada,North America,5.0,6.0,7.0,7.0,9.0


In [64]:
economy_gdp.head()

Unnamed: 0,Country,Region,Economy (GDP per Capita) 2015,Economy (GDP per Capita) 2016,Economy (GDP per Capita) 2017,Economy (GDP per Capita) 2018,Economy (GDP per Capita) 2019
0,Switzerland,Western Europe,1.397,1.527,1.565,1.42,1.452
1,Iceland,Western Europe,1.302,1.427,1.481,1.343,1.38
2,Denmark,Western Europe,1.325,1.442,1.482,1.351,1.383
3,Norway,Western Europe,1.459,1.577,1.616,1.456,1.488
4,Canada,North America,1.326,1.44,1.479,1.33,1.365


In [58]:
#Merge happiness and GDP data
Region_compare = pd.merge(happiness_ranking, economy_gdp, on = ["Country", "Region"], how = "outer")
#del Region_compare["Country"]
#Region_compare = Region_compare.groupby("Region")
Region_compare.head()

Unnamed: 0,Country,Region,Happiness Rank 2015,Happiness Rank 2016,Happiness Rank 2017,Happiness Rank 2018,Happiness Rank 2019,Economy (GDP per Capita) 2015,Economy (GDP per Capita) 2016,Economy (GDP per Capita) 2017,Economy (GDP per Capita) 2018,Economy (GDP per Capita) 2019
0,Switzerland,Western Europe,1.0,2.0,4.0,5.0,6.0,1.397,1.527,1.565,1.42,1.452
1,Iceland,Western Europe,2.0,3.0,3.0,4.0,4.0,1.302,1.427,1.481,1.343,1.38
2,Denmark,Western Europe,3.0,1.0,2.0,3.0,2.0,1.325,1.442,1.482,1.351,1.383
3,Norway,Western Europe,4.0,4.0,1.0,2.0,3.0,1.459,1.577,1.616,1.456,1.488
4,Canada,North America,5.0,6.0,7.0,7.0,9.0,1.326,1.44,1.479,1.33,1.365


In [69]:
#output = complete_data.to_csv("Resources/completData.csv", index=False, header=True)
region_df = Region_compare.groupby("Region")
#region_df.mean()
region_df.head()

Unnamed: 0,Country,Region,Happiness Rank 2015,Happiness Rank 2016,Happiness Rank 2017,Happiness Rank 2018,Happiness Rank 2019,Economy (GDP per Capita) 2015,Economy (GDP per Capita) 2016,Economy (GDP per Capita) 2017,Economy (GDP per Capita) 2018,Economy (GDP per Capita) 2019
0,Switzerland,Western Europe,1.0,2.0,4.0,5.0,6.0,1.397,1.527,1.565,1.42,1.452
1,Iceland,Western Europe,2.0,3.0,3.0,4.0,4.0,1.302,1.427,1.481,1.343,1.38
2,Denmark,Western Europe,3.0,1.0,2.0,3.0,2.0,1.325,1.442,1.482,1.351,1.383
3,Norway,Western Europe,4.0,4.0,1.0,2.0,3.0,1.459,1.577,1.616,1.456,1.488
4,Canada,North America,5.0,6.0,7.0,7.0,9.0,1.326,1.44,1.479,1.33,1.365
5,Finland,Western Europe,6.0,5.0,5.0,1.0,1.0,1.29,1.406,1.444,1.305,1.34
8,New Zealand,Australia and New Zealand,9.0,8.0,8.0,8.0,8.0,1.25,1.361,1.406,1.268,1.303
9,Australia,Australia and New Zealand,10.0,9.0,10.0,10.0,11.0,1.334,1.444,1.484,1.34,1.372
10,Israel,Middle East and Northern Africa,11.0,11.0,11.0,19.0,13.0,1.229,1.338,1.375,1.301,1.276
11,Costa Rica,Latin America and Caribbean,12.0,14.0,12.0,13.0,12.0,0.956,1.069,1.11,1.01,1.034


In [63]:
Region_compare["Region"].value_counts()

Sub-Saharan Africa                 31
Central and Eastern Europe         28
Latin America and Caribbean        20
Western Europe                     20
Middle East and Northern Africa    18
Southeastern Asia                   8
Southern Asia                       7
Eastern Asia                        4
Australia and New Zealand           2
North America                       2
Name: Region, dtype: int64

In [54]:
region_df = Region_compare.groupby("Region")["Region"].unique()
#df.groupby(['country','gender'])['industry'].unique()

AttributeError: 'DataFrameGroupBy' object has no attribute 'groupby'

In [34]:
# Sort results in Asx=cending order
#region_df = Region_compare.sort_values([""], ascending=False).round(2)

In [37]:
# It is possible to sort based upon multiple columns
#c = data_2015.sort_values(
    #["Family 2015", "Freedom 2015"], ascending=False)
#freedom_family_df.head()

In [36]:
# The index can be reset to provide index numbers based on the new rankings.
#new_index_df = freedom_family_df.reset_index(drop=True)
#new_index_df.head()