In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress


In [None]:
file_path = 'cleaned_data.csv'
happiness_df = pd.read_csv(file_path)

happiness_df.head()

In [None]:
corr = happiness_df.corr().style.background_gradient(cmap='coolwarm')
corr


# Freedom to Make Life Choices

Freedom to make life choices is the national average of a binary response (0=no, 1=yes) to the question "Are you satisfied or dissatisfied with your freedom to choice what you do with your life?"

Would it be more appropriate to name this "satisfaction with freedom to make life choices"?

In [None]:
happiness_df['freedom to make life choices'].hist(bins=25)

plt.xlabel('Freedom to Make Life Choices')
plt.ylabel('Count')
plt.title('Histogram of Freedom To Make Life Choices Results')

plt.show()

The data is fairly normally distributed, but with a left skew.

In [None]:
x_axis = happiness_df['freedom to make life choices']
y_axis = happiness_df['Happiness score']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = f'y = {round(slope, 2)}x + {round(intercept, 2)}'
rsquared = f'r squared = {round(rvalue * rvalue, 2)}'

plt.plot(x_axis, regress_values, color='red')
plt.annotate(line_eq, (0.03,6.9), fontsize=15, color="red")
plt.annotate(rsquared, (0.03, 6.4), fontsize=15, color="red")

plt.title('Freedom to Make Life Choices vs Happiness')
plt.ylabel('Happiness')
plt.xlabel('Freedom to Make Life Choices')

plt.scatter(x_axis, y_axis)
plt.show()

The correlation between Freedom to Make Life Choices and Happiness is low (rsquared = 0.39). This indicates that our regression has poor prediction power. 

To illustrate this point, let's examine the subset of this data where freedom to make life choices is greater than 0.60 but less than 0.65.

In [None]:
freedom_subset = happiness_df.loc[(happiness_df['freedom to make life choices'] > 0.60) & 
                                  (happiness_df['freedom to make life choices'] < 0.65)]


x_axis = freedom_subset['freedom to make life choices']
y_axis = freedom_subset['Happiness score']

min = f'Rwanda: {y_axis.min()}'
max = f'Austria: {y_axis.max()}'

plt.annotate(min, (0.623,3.2), fontsize=12, color="red")
plt.annotate(max, (0.607, 7), fontsize=12, color="red")

plt.title('Freedom to Make Life Choices vs Happiness')
plt.ylabel('Happiness')
plt.xlabel('Freedom to Make Life Choices')

plt.scatter(x_axis, y_axis)
plt.show()

Out of the 16 countries in this subset, the happiness scores ranged from 3.268 (Rwanda) to 7.163 (Austria).

Would you choose where to live based solely on freedom to make life choices?