In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st

In [None]:
file = "DataForTable2.1.csv"
happiness_years = pd.read_csv(file)
happiness_years.head()

In [None]:
happiness_years.describe()

In [None]:
happiness_years.loc[happiness_years["Generosity"] == 0]

In [None]:
# keep needed series
happiness_2021 = happiness_years.loc[happiness_years['year']==happiness_years['year'].max(),
                                   ["Country name","Life Ladder","Log GDP per capita","Social support",
                                   "Healthy life expectancy at birth","Freedom to make life choices"]]
happiness_2021["Country name"].nunique()

In [None]:
# HOW TO HANDLE NaN VALUES - remove null values for any of the 4 colums we're looking at

#set NaN values to 0 - none of the colums have a legitimate value of 0
#happiness_years = happiness_years.fillna(0)
#happiness_years.head()
happiness_2021 = happiness_2021.dropna()
happiness_2021.head()

In [None]:
happiness_2021 = happiness_2021.rename(columns = {'Log GDP per capita': 'GDP per capita',
                                              'Life Ladder':'Happiness score',
                                             'Healthy life expectancy at birth': 'healthy life expectancy',
                                                 'Country name':'Country'})
happiness_2021.head()

In [None]:
#corr table for reference
corr = happiness_2021.corr()
corr = corr.style.background_gradient(cmap='PRGn')
corr

In [None]:
#export to csv
happiness_2021.to_csv('cleaned_happiness_2021.csv')

In [None]:
# Start of Amanda's code

### Social Support
Per the World Happiness Report: 
>Social support (or having someone to count on in times of trouble) is the national average of the binary responses (either 0 or 1) to the GWP question “If you were in trouble, do you have relatives or friends you can count on to help you whenever you need them, or not?”

### "Outliers"
Two nations stick out as having outlier social support values (see table below). They are also easily located on the regression chart. Benin in particular sticks out for having a very low social support, but a noticably higher hapiness score than Afghanistan. 

In [None]:
x_values = happiness_2021['Social support']
y_values = happiness_2021['Life Ladder']

In [None]:
quartiles = x_values.quantile([0.25,0.5,0.75])
lowerq = quartiles[0.25]
upperq = quartiles[0.75]
iqr = upperq-lowerq
lower_bound = lowerq - (1.5*iqr)
upper_bound = upperq + (1.5*iqr)
print(f"Q1: {lowerq}\nQ3: {upperq}\nIQR: {iqr}\nLower Bound: {lower_bound}\nUpper Bound: {upper_bound}")

In [None]:
outliers = happiness_2021.loc[x_values<lower_bound,:]
outliers
outliers.sort_values('Social support')

In [None]:
plt.boxplot(x_values, showmeans=True)
plt.title("Social Support Values")
for country in outliers['Country name'].values:
    y = outliers.loc[outliers['Country name']==country,'Social support']
    plt.annotate(country,(1.05,y),fontsize=10,color="red")
plt.show()

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

In [None]:
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(0.7,2.4),fontsize=15,color="red")
plt.xlabel("Social Support")
plt.ylabel("Happiness Score")
plt.title("Social Support\nvs\nTotal Happiness Score")
plt.show()

In [None]:
pr = round(st.pearsonr(x_values,y_values)[0],2)
print(f'The correlation between social support and happiness is {pr}, suggesting a strong link between the two factors.')

In [None]:
bottom_10 = happiness_2021.sort_values('Social support').head(10)
bottom_10.sort_values(['Life Ladder','Social support'])

In [None]:
x_values = bottom_10['Social support']
y_values = bottom_10['Life Ladder']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(0.525,2.4),fontsize=15,color="red")
plt.xlabel("Social Support")
plt.ylabel("Happiness Score")
plt.title("Social Support\nvs\nTotal Happiness Score")
plt.show()
pr = round(st.pearsonr(x_values,y_values)[0],2)
print(f'The correlation between social support and happiness for bottom 10 countries is {pr}')

In [None]:
# End of Amanda's code

In [None]:
# Start John's code

In [None]:
# End John's code

In [None]:
# Start Nathan's code

In [None]:
# End Nathan's code

In [None]:
# Start Joey's code

In [None]:
# End Joey's code