In [228]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt 
import requests
from scipy.stats import linregress
from sklearn.linear_model import LinearRegression
import plotly.express as px
import numpy as np
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots

In [229]:
%matplotlib inline
%matplotlib widget

In [230]:
gbd_file_path = r"/Users/lnovo_crisissupport/Desktop/SMU Data Analytics BC/Project-3/csv/GBD 15-69.csv"
whr_file_path = r"/Users/lnovo_crisissupport/Desktop/SMU Data Analytics BC/Project-3/csv/happiness_condensed.csv"

In [231]:
#read CSV file into Pandas DF
gbd_df = pd.read_csv(gbd_file_path)
whr_df = pd.read_csv(whr_file_path)

In [232]:
# Restructure data
gbd_df.rename(columns={'Road.Injuries': 'Road Injuries'}, inplace=True)
gbd_df.rename(columns= {'Ischemic.Heart.Disease': 'Ischemic Heart Disease'}, inplace=True)
gbd_df.drop(columns=['Year', 'Age', 'Sex'], inplace=True)

# Drop rows with missing values
clean_gbd = gbd_df.dropna()
clean_whr = whr_df.dropna()

# Merge the datasets based on the 'Location' column, DataFrames using full outer join
GBDWHR_df = pd.merge(clean_gbd, clean_whr, on='Location', how='outer')

In [233]:
print(GBDWHR_df)
GBDWHR_df.dropna(inplace=True)

                    Location                  Measure  Road Injuries  \
0                Afghanistan  Percent of total deaths       0.066641   
1                     Angola  Percent of total deaths       0.082701   
2                    Albania  Percent of total deaths       0.068380   
3                    Andorra  Percent of total deaths       0.076438   
4       United Arab Emirates  Percent of total deaths       0.152665   
..                       ...                      ...            ...   
220                     Iran                      NaN            NaN   
221  Palestinian Territories                      NaN            NaN   
222         Congo (Kinshasa)                      NaN            NaN   
223                Swaziland                      NaN            NaN   
224                 Tanzania                      NaN            NaN   

          HIV  Ischemic Heart Disease  Logged GDP per capita  Social support  \
0    0.002164                0.179838               7.4

In [234]:
GBDWHR_df.to_csv('GBDWHR.csv', index=False)

In [235]:
variables = ["Freedom to make life choices", "Healthy life expectancy", "Social support", "Logged GDP per capita"]

# Store the figure objects
figs = []

# Iterate over each variable and create the scatter plot
for variable in variables:
    figAnalysis1 = px.scatter(GBDWHR_df, x="Road Injuries", y=variable, hover_name="Location", trendline="ols",trendline_options=dict(log_x=True))
    figAnalysis1.update_layout(
        title=f"Scatterplot of Road Injury Death Rates and {variable}",
        xaxis_title="Road Injuries",
        yaxis_title=variable,
        title_x=0.5,  # Center the title horizontally
        title_y=.95,
        width=700,   # Set the width of the plot
        height=300   # Set the height of the plot
    )
    figs.append(figAnalysis1)

# Show the plots
for figAnalysis1 in figs:
    figAnalysis1.show()

In [236]:
variables = ["Freedom to make life choices", "Healthy life expectancy", "Social support", "Logged GDP per capita"]

# Store the figure objects
figs = []

# Iterate over each variable and create the scatter plot
for variable in variables:
    figAnalysis2 = px.scatter(GBDWHR_df, x="HIV", y=variable, hover_name="Location", trendline="ols",)
    figAnalysis2.update_layout(
        title=f"Scatterplot HIV death rates and {variable}",
        xaxis_title="Road Injuries",
        yaxis_title=variable,
        title_x=0.5,  # Center the title horizontally
        title_y=.95,
        width=600,   # Set the width of the plot
        height=300   # Set the height of the plot
    )
    figs.append(figAnalysis2)

# Show the plots
for figAnalysis2 in figs:
    figAnalysis2.show()

In [237]:
variables = ["Freedom to make life choices", "Healthy life expectancy", "Social support", "Logged GDP per capita"]

# Store the figure objects
figs = []

# Iterate over each variable and create the scatter plot
for variable in variables:
    figAnalysis3 = px.scatter(GBDWHR_df, x="Ischemic Heart Disease", y=variable, hover_name="Location", trendline="ols", trendline_options=dict(log_x=True))
    figAnalysis3.update_layout(
        title=f"Scatterplot IHD death rates and {variable}",
        xaxis_title="Ischemic Heart Disease",
        yaxis_title=variable,
        title_x=0.5,  # Center the title horizontally
        title_y=.95,
        width=600,   # Set the width of the plot
        height=300   # Set the height of the plot
    )
    figs.append(figAnalysis3)

# Show the plots
for figAnalysis3 in figs:
    figAnalysis3.show()

In [238]:
variables = ["Freedom to make life choices", "Healthy life expectancy", "Social support", "Logged GDP per capita"]

# Set the width and height of each subplot
width = 500
height = 400

# Create subplots with adjustable width and height
figCompare1 = make_subplots(
    rows=2, 
    cols=2, 
    subplot_titles=[f"Road-Injury Death Rates and {var}" for var in variables],
    column_widths=[width] * 2,  # Width of each column
    row_heights=[height] * 2      # Height of each row
)

# Iterate over each variable and create the scatter plot
for i, variable in enumerate(variables):
    hover_text = [f"{variable}: {y}<br>Location: {location}" for y, location in zip(GBDWHR_df[variable], GBDWHR_df['Location'])]
    scatter = go.Scatter(
        x=GBDWHR_df["Road Injuries"],
        y=GBDWHR_df[variable],
        mode="markers",
        hoverinfo="text",
        text=hover_text
    )
    figCompare1.add_trace(scatter, row=i // 2 + 1, col=i % 2 + 1)

# Update layout properties
figCompare1.update_layout(
    title_text="Scatterplots of Road-Injury Death Rates and WHR Measurements of Happiness (y-axis)",
    xaxis_title="Road Injuries",
    showlegend=False,
    width=2 * width,   # Total width of the figure
    height=1.5 * height,  # Total height of the figure
    title_x=0.5,  # Center the title horizontally
    title_xanchor="center"  # Anchor the title to the center
)

# Show the plot
figCompare1.show()

In [239]:
variables = ["Freedom to make life choices", "Healthy life expectancy", "Social support", "Logged GDP per capita"]

# Set the width and height of each subplot
width = 450
height = 400

# Create subplots with adjustable width and height
figCompare2 = make_subplots(
    rows=2, 
    cols=2, 
    subplot_titles=[f"HIV death rates and {var}" for var in variables],
    column_widths=[width] * 2,  # Width of each column
    row_heights=[height] * 2      # Height of each row
)

# Iterate over each variable and create the scatter plot
for i, variable in enumerate(variables):
    hover_text = [f"{variable}: {y}<br>Location: {location}" for y, location in zip(GBDWHR_df[variable], GBDWHR_df['Location'])]
    scatter = go.Scatter(
        x=GBDWHR_df["HIV"],
        y=GBDWHR_df[variable],
        mode="markers",
        hoverinfo="text",
        text=hover_text
    )
    figCompare2.add_trace(scatter, row=i // 2 + 1, col=i % 2 + 1)

# Update layout properties
figCompare2.update_layout(
    title_text="Scatterplots of HIV death rates and WHR Measurements of Happiness (y-axis)",
    xaxis_title="HIV",
    showlegend=False,
    width=2 * width,   # Total width of the figure
    height=1.5 * height,  # Total height of the figure
    title_x=0.5,  # Center the title horizontally
    title_xanchor="center"  # Anchor the title to the center
)

# Show the plot
figCompare2.show()

In [240]:
variables = ["Freedom to make life choices", "Healthy life expectancy", "Social support", "Logged GDP per capita"]

# Set the width and height of each subplot
width = 450
height = 400

# Create subplots with adjustable width and height
figCompare3 = make_subplots(
    rows=2, 
    cols=2, 
    subplot_titles=[f"IHD death rates and {var}" for var in variables],
    column_widths=[width] * 2,  # Width of each column
    row_heights=[height] * 2      # Height of each row
)

# Iterate over each variable and create the scatter plot
for i, variable in enumerate(variables):
    hover_text = [f"{variable}: {y}<br>Location: {location}" for y, location in zip(GBDWHR_df[variable], GBDWHR_df['Location'])]
    scatter = go.Scatter(
        x=GBDWHR_df["Ischemic Heart Disease"],
        y=GBDWHR_df[variable],
        mode="markers",
        hoverinfo="text",
        text=hover_text
    )
    figCompare3.add_trace(scatter, row=i // 2 + 1, col=i % 2 + 1)

# Update layout properties
figCompare3.update_layout(
    title_text="Scatterplots of Ischemic Heart Disease death rates and WHR Measurements of Happiness (y-axis)",
    xaxis_title="Ischemic Heart Disease",
    showlegend=False,
    width=2 * width,   # Total width of the figure
    height=1.5 * height,  # Total height of the figure
    title_x=0.5,  # Center the title horizontally
    title_xanchor="center"  # Anchor the title to the center
)

# Show the plot
figCompare3.show()

In [241]:
numeric_cols = ["Road Injuries", "HIV", "Ischemic Heart Disease", "Logged GDP per capita", 
                "Social support","Healthy life expectancy","Freedom to make life choices"]

# Convert the data in these columns to numeric if they're not already
GBDWHR_df[numeric_cols] = GBDWHR_df[numeric_cols].apply(pd.to_numeric, errors='coerce')

# Create a list to store hover text for each data point
hover_text = []
for index, row in GBDWHR_df.iterrows():
    text = ""
    for col in numeric_cols:
        text += f"{col}: {row[col]}<br>"
    text += f"Location: {row['Location']}"
    hover_text.append(text)

# Create a scatter matrix using Plotly
figScatter = go.Figure(data=go.Splom(
                  dimensions=[dict(label=col, values=GBDWHR_df[col]) for col in numeric_cols],
                  text=hover_text,  # Use hover text generated above
                  marker=dict(color='blue', showscale=False),
                  showupperhalf=False,  # Do not show the upper half of the scatter matrix
                  showlowerhalf=True,
))

# Update layout properties for better visualization
figScatter.update_layout(
    title='Top 3 Risks of Death (rates) against WHR Measurements of Happiness',
    hovermode='closest',  # Show information from the nearest point
    dragmode='select',  # Enable selection box for zooming
    width=1150,
    height=1345,
    title_x=0.5,  # Center the title horizontally
    title_xanchor='center'
)
figScatter.update_traces(diagonal_visible=False)
# Show plot
figScatter.show()

In [242]:
# Save chosen graphs as html
pio.write_html(figScatter,"3 Risks v WHR Scatter Matrix.html")
pio.write_html(figCompare1,"Comparison 1.html")
pio.write_html(figCompare2,"Comparison 2.html")
pio.write_html(figCompare3,"Comparison 3.html")

In [243]:
#To Json Scatter
json_7 = pio.to_json(figScatter)
json_7


'{"data":[{"dimensions":[{"label":"Road Injuries","values":[0.066641327,0.06838024,0.15266505,0.079143662,0.048349224,0.059117539,0.050365923,0.038269098,0.041683861,0.06704812,0.042746516,0.055095026,0.028538244,0.037427134,0.097386102,0.033934445,0.037829355,0.086101656,0.034871225,0.071011947,0.055790023,0.0403709,0.083013317,0.050995562,0.080120402,0.055547256,0.107248144,0.098016312,0.045764327,0.044368224,0.096074618,0.162422905,0.127797705,0.050841039,0.028575966,0.031542352,0.034215758,0.05991004,0.083831834,0.034596084,0.075373431,0.051857603,0.035918195,0.030919251,0.09551369,0.056673057,0.051319469,0.064738773,0.02414776,0.03861593,0.055434579,0.057327416,0.034484986,0.068001274,0.037279889,0.068584453,0.073457327,0.028279817,0.099227476,0.033480451,0.053265494,0.022197937,0.076460344,0.067473322,0.123313694,0.05591117,0.027606643,0.138825952,0.06817149,0.036598253,0.032398383,0.061344137,0.036885324,0.122015526,0.031369023,0.041648061,0.070708359,0.028074999,0.03944543,0.03

In [244]:
#Json Analysis1
json_8 = pio.to_json(figCompare1)
json_8


'{"data":[{"hoverinfo":"text","mode":"markers","text":["Freedom to make life choices: 0.396573007<br>Location: Afghanistan","Freedom to make life choices: 0.781994224<br>Location: Albania","Freedom to make life choices: 0.941345513<br>Location: United Arab Emirates","Freedom to make life choices: 0.831132412<br>Location: Argentina","Freedom to make life choices: 0.712017834<br>Location: Armenia","Freedom to make life choices: 0.915431738<br>Location: Australia","Freedom to make life choices: 0.899989486<br>Location: Austria","Freedom to make life choices: 0.786824107<br>Location: Azerbaijan","Freedom to make life choices: 0.626350224<br>Location: Burundi","Freedom to make life choices: 0.813582063<br>Location: Belgium","Freedom to make life choices: 0.735182583<br>Location: Benin","Freedom to make life choices: 0.665563583<br>Location: Burkina Faso","Freedom to make life choices: 0.90062499<br>Location: Bangladesh","Freedom to make life choices: 0.745178163<br>Location: Bulgaria","Free

In [245]:
#Json Analysis2
json_9 = pio.to_json(figCompare2)
json_9

'{"data":[{"hoverinfo":"text","mode":"markers","text":["Freedom to make life choices: 0.396573007<br>Location: Afghanistan","Freedom to make life choices: 0.781994224<br>Location: Albania","Freedom to make life choices: 0.941345513<br>Location: United Arab Emirates","Freedom to make life choices: 0.831132412<br>Location: Argentina","Freedom to make life choices: 0.712017834<br>Location: Armenia","Freedom to make life choices: 0.915431738<br>Location: Australia","Freedom to make life choices: 0.899989486<br>Location: Austria","Freedom to make life choices: 0.786824107<br>Location: Azerbaijan","Freedom to make life choices: 0.626350224<br>Location: Burundi","Freedom to make life choices: 0.813582063<br>Location: Belgium","Freedom to make life choices: 0.735182583<br>Location: Benin","Freedom to make life choices: 0.665563583<br>Location: Burkina Faso","Freedom to make life choices: 0.90062499<br>Location: Bangladesh","Freedom to make life choices: 0.745178163<br>Location: Bulgaria","Free

In [246]:
#Json Analysis3
json_10 = pio.to_json(figCompare3)
json_10

'{"data":[{"hoverinfo":"text","mode":"markers","text":["Freedom to make life choices: 0.396573007<br>Location: Afghanistan","Freedom to make life choices: 0.781994224<br>Location: Albania","Freedom to make life choices: 0.941345513<br>Location: United Arab Emirates","Freedom to make life choices: 0.831132412<br>Location: Argentina","Freedom to make life choices: 0.712017834<br>Location: Armenia","Freedom to make life choices: 0.915431738<br>Location: Australia","Freedom to make life choices: 0.899989486<br>Location: Austria","Freedom to make life choices: 0.786824107<br>Location: Azerbaijan","Freedom to make life choices: 0.626350224<br>Location: Burundi","Freedom to make life choices: 0.813582063<br>Location: Belgium","Freedom to make life choices: 0.735182583<br>Location: Benin","Freedom to make life choices: 0.665563583<br>Location: Burkina Faso","Freedom to make life choices: 0.90062499<br>Location: Bangladesh","Freedom to make life choices: 0.745178163<br>Location: Bulgaria","Free

In [247]:
# Save to PNG
# pio.write_image(figScatter,"3 Risks v WHR Scatter Matrix.png")
# pio.write_image(figCompare1,"Compare 1.png")
# pio.write_image(figCompare2,"Compare 2.png")
# pio.write_image(figCompare3,"Compare 3.png")