In [None]:
#Dependecies 
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np

# Python Api Dependencies
import requests
import json
from pprint import pprint
import hvplot.pandas
from scipy.stats import linregress
# conda install -c conda-forge geopandas
import geopandas as gpd
# pip install pycountry
import pycountry
from mplcursors import cursor  # separate package must be installed

# Import the API key
from api_keys import geoapify_key

# Turn off warning messages
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Read in the WHR_2023 data
whr_df = pd.read_csv("../Resources/WHR_2023.csv")
whr_df.head()

In [None]:
#rename columns
whr_df.columns = ["Country", "Year", "Happiness Score", "GDP p/capita", "Social Support",
                  "Healthy Life Expectancy", "Life Choices Freedom", "Generosity",
                  "Corruption Perceptions", "Positive Affect", "Negative Effect"]
whr_df.head()

In [None]:
# Extract years that we are woprking with 
whr_clean_df = whr_df.loc[whr_df["Year"] >= 2016]
whr_clean_df

In [None]:
#group by year the clean df
grouped_clean_df = whr_clean_df.groupby(["Year"]).mean()
grouped_clean_df

In [None]:
#plot hapiness score over time 2016-2022
x_axis = grouped_clean_df.index
y_axis = grouped_clean_df["Happiness Score"]
plt.plot(x_axis, y_axis)
plt.xticks(grouped_clean_df.index)
plt.show()

In [None]:
#plot Healthy Life Expectancy score over time 2016-2022
x_axis = grouped_clean_df.index
y_axis = grouped_clean_df["Healthy Life Expectancy"]
plt.plot(x_axis, y_axis)
plt.xticks(grouped_clean_df.index)
plt.show()

In [None]:
#plot GDP score over time 2016-2022
x_axis = grouped_clean_df.index
y_axis = grouped_clean_df["GDP p/capita"]
plt.plot(x_axis, y_axis)
plt.xticks(grouped_clean_df.index)
plt.show()

In [None]:
# create a new df for pre-pandemic (2016-2019) 
pre_covid_df = whr_clean_df.loc[whr_clean_df["Year"] <= 2019]
pre_covid_df

In [None]:
#create a post-pandemic data frame 
post_covid_df = whr_clean_df.loc[whr_clean_df["Year"] >= 2020]
post_covid_df

In [None]:
# group post_covid_df by year and calculate mean of parameters 
grouped_post_covid = post_covid_df.groupby(["Year"]).mean()
grouped_post_covid

In [None]:
# group pre_covid_df bu year and calculate mean of parameters 
grouped_pre_covid = pre_covid_df.groupby(["Year"]).mean()
pre_covid = pd.DataFrame(grouped_pre_covid)
pre_covid

In [None]:
# plot test 

x_axis = pre_covid.index
y_axis = pre_covid["Happiness Score"]
plt.bar(x_axis, y_axis)
plt.xticks(pre_covid.index)
plt.show()

In [None]:
# read the depression file 
depression_df = pd.read_csv("../Resources/depression-rates-by-country-2023.csv")
depression_df.head()

In [None]:
# rename columns 
depression_df.columns = ["Country", "% of Pop with Depression", "Cases", "Pop-2023"]
depression_df

In [None]:
# Python API
# Create a dataframe for the country and longitue/latitude
params = {
    "apiKey": geoapify_key
}

# Build URL using the geocode endpoint
base_url = "https://api.geoapify.com/v1/geocode/search"

country_data = []

country_list = whr_clean_df["Country"].unique()
print(f"There are {len(country_list)} countries in total.")

# Go through list of countries to get coordinates
for country in country_list:
    params["country"] = country
    
    # Run request
    response = requests.get(base_url, params=params).json()
    
    try:
        lat = response['features'][0]['properties']['lat']
        long = response['features'][0]['properties']['lon']
        a2code = response['features'][0]['properties']['country_code']
        country_py = pycountry.countries.get(alpha_2=a2code)
        a3code = country_py.alpha_3
    except:
        pass
        
    country_data.append({"Country": country,
                        "Latitude": lat,
                        "Longitude":long,
                        "Code_A3" : a3code.upper()})
    

country_df = pd.DataFrame(country_data)
country_df

In [None]:
#Show the participating countries
# Configure the map plot
country_map_plot = country_df.hvplot.points(
    "Longitude",
    "Latitude",
    geo = True,
    tiles = "ESRI",
    frame_width = 800,
    frame_height = 600,
    #size = "Humidity",
    scale = 0.5,
    color = "Country"

)

# Display the map
country_map_plot

In [None]:
# Save original dataframe to a working dataframe
world_happiness_df = whr_clean_df
world_happiness_df

In [None]:
# Create Happiness average score per country
whr_clean_group_df = whr_clean_df.groupby("Country").mean()
# Merge the two DataFrames, WHR and Country coordinates, together based on the Country Name they share
merged_country_happiness_df = pd.merge(whr_clean_group_df, country_df, on=["Country"])
merged_country_happiness_df

In [None]:
# Merge the two DataFrames, WHR with coordinates and Depression data, together based on the Country Name they share
#
# IMPORTANT NOTE: Depression rate was dated 2023 ???
#
happiness_depression_df = pd.merge(merged_country_happiness_df, depression_df, on=["Country"])
happiness_depression_df

In [None]:
# Display top 10 countries with highest Happiness Score
happiness_depression_df = happiness_depression_df.sort_values("Happiness Score", ascending=False)
country_happiness_df = happiness_depression_df.iloc[0:10, :].set_index("Country")
x_axis = country_happiness_df.index
y_axis = country_happiness_df["Happiness Score"]
plt.bar(x_axis, y_axis)
plt.xticks(country_happiness_df.index, rotation=50, ha="right")
plt.ylabel("Happiness Score")
plt.show()


In [None]:
# Display top 10 countries with lowest Happiness Score
happiness_depression_df = happiness_depression_df.sort_values("Happiness Score")
country_happiness_df = happiness_depression_df.iloc[0:10, :].set_index("Country")
x_axis = country_happiness_df.index
y_axis = country_happiness_df["Happiness Score"]
plt.bar(x_axis, y_axis)
plt.xticks(country_happiness_df.index, rotation=50, ha="right")
plt.ylabel("Happiness Score")
plt.show()

In [None]:
# Display top 5 countries with highest depression
happiness_depression_df = happiness_depression_df.sort_values("% of Pop with Depression", ascending=False)
country_depression_df = happiness_depression_df.iloc[0:10, :].set_index("Country")
x_axis = country_depression_df.index
y_axis = country_depression_df["% of Pop with Depression"]
plt.bar(x_axis, y_axis)
plt.xticks(country_depression_df.index, rotation=50, ha="right")
plt.ylabel("% of Pop with Depression")
plt.show()


In [None]:
# Display top 5 countries with lowest depression
happiness_depression_df = happiness_depression_df.sort_values("% of Pop with Depression")
country_depression_df = happiness_depression_df.iloc[0:10, :].set_index("Country")
x_axis = country_depression_df.index
y_axis = country_depression_df["% of Pop with Depression"]
plt.bar(x_axis, y_axis)
plt.xticks(country_depression_df.index, rotation=50, ha="right")
plt.ylabel("% of Pop with Depression")
plt.show()


# Show plot based on lacation and the happiness score/% of population with depression (side by side) marked by color intensity

In [None]:

# Read country map images
world_maps = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Merge your data with the map data
world_happiness_merged = world_maps.merge(happiness_depression_df, left_on='iso_a3', right_on='Code_A3')

# Plot the map with colored tiles
fig, ax = plt.subplots(1, 2, figsize=(18, 8))

world_happiness_merged.plot(column='Happiness Score', cmap='viridis', linewidth=0.4, ax=ax[0], legend=True, legend_kwds={'label': "Happiness Score"})
ax[0].set_title('Country Happiness Score')
ax[0].set_ylabel("Latitude")
ax[0].set_xlabel("Longitude")

# world_happiness_merged contains depression data too
world_depression_merged = world_happiness_merged


# getting the original colormap using cm.get_cmap() function
orig_map=plt.cm.get_cmap('viridis')
  
# reversing the original colormap using reversed() function
reversed_map = orig_map.reversed()

world_depression_merged.plot(column='% of Pop with Depression', cmap=reversed_map, linewidth=0.4, ax=ax[1], legend=True, legend_kwds={'label': "% of Population with Depression"})
ax[1].set_title('Country % of Population with Depression')
ax[1].set_ylabel("Latitude")
ax[1].set_xlabel("Longitude")

plt.show()

# Show plot based on lacation and the happiness score marked by color intensity

In [None]:

# Read country map images
world_maps = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Merge your data with the map data
world_happiness_merged = world_maps.merge(happiness_depression_df, left_on='iso_a3', right_on='Code_A3')

# Plot the map with colored tiles
fig, ax = plt.subplots(1, figsize=(12, 6))

world_happiness_merged.plot(column='Happiness Score', cmap='viridis', linewidth=0.4, ax=ax, legend=True, legend_kwds={'label': "Happiness Score"})
ax.set_title('Country Happiness Score')
ax.set_ylabel("Latitude")
ax.set_xlabel("Longitude")
plt.show()

# Show plot based on lacation and the % of Population with Depression score marked by color intensity

In [None]:
# Use previously read country map images

# world_happiness_merged contains depression data too
world_depression_merged = world_happiness_merged

# Plot the map with colored tiles
fig, ax = plt.subplots(1, figsize=(12, 6))

# getting the original colormap using cm.get_cmap() function
orig_map=plt.cm.get_cmap('viridis')
  
# reversing the original colormap using reversed() function
reversed_map = orig_map.reversed()

world_depression_merged.plot(column='% of Pop with Depression', cmap=reversed_map, linewidth=0.4, ax=ax, legend=True, legend_kwds={'label': "% of Population with Depression"})
ax.set_title('Country % of Population with Depression')
ax.set_ylabel("Latitude")
ax.set_xlabel("Longitude")
plt.show()

# Display Happiness Score in box plots for Finland(top), Afghanistan(bottom) & Australia

In [None]:
# Display box plots for 3 countries to show happiness outliers (Australia, Finland, Afghanistan)
happiness_df = pd.DataFrame()

# Calculate the IQR and quantitatively determine if there are any potential outliers. 
for country in country_list:

    # Locate the rows for specific countries and get Happiness Score
    score = world_happiness_df.loc[world_happiness_df["Country"]==country, "Happiness Score"]

    # add subset 
    happiness_df["Score"] = pd.DataFrame(score.tolist())
    happiness_df[country] = pd.DataFrame(score.tolist())
    
    # Determine outliers using upper and lower bounds
    quartiles = happiness_df["Score"].quantile([0.25,0.5,0.75])
    lowerq = quartiles[0.25]
    upperq = quartiles[0.75]
    iqr = upperq-lowerq

    lower_bound = lowerq - (1.5*iqr)
    upper_bound = upperq + (1.5*iqr)
    
    outliers = happiness_df.loc[happiness_df["Score"]<lower_bound, "Score"]
    print(f"{country} potential outliers: {outliers}")


ax = happiness_df[["Finland", "Afghanistan", "Australia"]].plot(kind='box', sym='r')

ax.set_ylabel('Happiness Score')
plt.show()



In [None]:
# Create Happiness average score per year for all countries to represent the world
average_happiness_year_df = world_happiness_df.groupby("Year").mean()
average_happiness_year_df.head()


# Display Hppiness Score box plot for World,Australia, Finland

In [None]:
# Create box plots to compare the World, Australia and Finland
index = 0
for row in list(happiness_df["Finland"]):
    happiness_df["World"] = average_happiness_year_df.iloc[index, 0]
    index = index + 1
    

ax = happiness_df[["World", "Australia", "Finland"]].plot(kind='box', sym='r')
ax.set_ylabel('Happiness Score')

plt.show()

In [None]:
# Define a function to create Linear Regression plots
def plot_linear_regression(main_df, xcolumn_str, ycolumn_str):

    # Drop rows with null
    main_df = main_df.dropna()
    
    x_values = main_df[xcolumn_str]
    y_values = main_df[ycolumn_str]
    # Build the scatter plots 
    plt.scatter(x_values, y_values, marker='o', c=y_values)

    # Incorporate the other graph properties
    plt.title(f"{xcolumn_str} vs. {ycolumn_str}")
    plt.ylabel(ycolumn_str)
    plt.xlabel(xcolumn_str)

    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    
    # Get regression values
    regress_values = x_values * slope + intercept
    
    # Create line equation string
    line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

    plt.plot(x_values,regress_values,"r-")

    # Label plot and annotate the line equation
    x = x_values.min()
    y = y_values.min()
    plt.annotate(line_eq,(x, y),fontsize=15,color="red")

    # Print r value
    print(f"The r-value is: {rvalue**2}")

    # Show plot
    plt.show()

    return

# GDP p/capita vs Happiness Score

In [None]:
# Plot GDP vs Happiness Score
plot_linear_regression(happiness_depression_df, "GDP p/capita", "Happiness Score")


#### GDP p/capita vs Happiness Score has a moderate correlation.

# GDP p/capita vs % of Pop with Depression

In [None]:
# Plot GDP vs Depression %
plot_linear_regression(happiness_depression_df, "GDP p/capita", "% of Pop with Depression")


#### GDP p/capita vs % of Pop with Depression has a weak correlation.

# Happiness Score vs % of Population with Depression

In [None]:
# Plot GDP vs Depression %
plot_linear_regression(happiness_depression_df, "Happiness Score", "% of Pop with Depression")

#### Happiness vs % of Population with Depression has very weak or no correlation at all.