# Does attaining a higher level of education correlate to lower cause of death prevalence?

In [1]:
# Dependencies and Setup
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px 
import seaborn as sns 
import plotly.graph_objects as go
import plotly.io as pio

  from pandas.core import (


In [2]:
# study data files
edu_data = pd.read_csv("C:/Users/Beyonka/Project-3/edu_2020_df.csv")
GBD_data = pd.read_csv("C:/Users/Beyonka/Project-3/GBD 15-69.csv")

# Rename the column
edu_data.rename(columns= {'Avg.Ed...Years.': 'Avg. Ed'}, inplace=True)
edu_data.rename(columns= {'Entity': 'Location'}, inplace=True)
GBD_data.rename(columns={'Road.Injuries': 'Road Injuries'}, inplace=True)
GBD_data.rename(columns= {'Ischemic.Heart.Disease': 'Ischemic Heart Disease'}, inplace=True)


In [3]:
edu_data.head()


Unnamed: 0.1,Unnamed: 0,X,Location,Code,Year,Avg. Ed
0,1,1,Afghanistan,AFG,2020,5.69
1,2,2,Africa,,2020,6.681866
2,3,3,Albania,ALB,2020,10.32
3,4,4,Algeria,DZA,2020,8.18
4,5,5,Argentina,ARG,2020,9.86


In [4]:
print(edu_data.columns)


Index(['Unnamed: 0', 'X', 'Location', 'Code', 'Year', 'Avg. Ed'], dtype='object')


In [5]:
# Cleaned columns that are not needed
edu_data.drop(columns=['Unnamed: 0', 'X', 'Year', 'Code'], inplace=True)


In [6]:
edu_data.head()

Unnamed: 0,Location,Avg. Ed
0,Afghanistan,5.69
1,Africa,6.681866
2,Albania,10.32
3,Algeria,8.18
4,Argentina,9.86


In [7]:
GBD_data.head()

Unnamed: 0.1,Unnamed: 0,X,Location,Year,Age,Sex,Measure,Road Injuries,HIV,Ischemic Heart Disease
0,1,1,Afghanistan,2019,15-69 years,Both,Percent of total deaths,0.066641,0.002164,0.179838
1,2,2,Angola,2019,15-69 years,Both,Percent of total deaths,0.082701,0.195853,0.0504
2,3,3,Albania,2019,15-69 years,Both,Percent of total deaths,0.06838,0.000502,0.173301
3,4,4,Andorra,2019,15-69 years,Both,Percent of total deaths,0.076438,0.026947,0.06615
4,5,5,United Arab Emirates,2019,15-69 years,Both,Percent of total deaths,0.152665,0.005384,0.156873


In [8]:
print(GBD_data.columns)

Index(['Unnamed: 0', 'X', 'Location', 'Year', 'Age', 'Sex', 'Measure',
       'Road Injuries', 'HIV', 'Ischemic Heart Disease'],
      dtype='object')


In [9]:
# Cleaned columns that are not needed
GBD_data.drop(columns=['Unnamed: 0', 'X', 'Year', 'Age', 'Sex'], inplace=True)

In [10]:
GBD_data.head()

Unnamed: 0,Location,Measure,Road Injuries,HIV,Ischemic Heart Disease
0,Afghanistan,Percent of total deaths,0.066641,0.002164,0.179838
1,Angola,Percent of total deaths,0.082701,0.195853,0.0504
2,Albania,Percent of total deaths,0.06838,0.000502,0.173301
3,Andorra,Percent of total deaths,0.076438,0.026947,0.06615
4,United Arab Emirates,Percent of total deaths,0.152665,0.005384,0.156873


In [11]:
# Drop rows with missing values
cleaned_edu = edu_data.dropna()

# Drop rows with missing values
cleaned_GBD = GBD_data.dropna()

In [12]:
cleaned_edu

Unnamed: 0,Location,Avg. Ed
0,Afghanistan,5.690000
1,Africa,6.681866
2,Albania,10.320000
3,Algeria,8.180000
4,Argentina,9.860000
...,...,...
148,Vietnam,8.350000
149,World,8.840671
150,Yemen,5.560000
151,Zambia,8.370000


In [13]:
cleaned_GBD

Unnamed: 0,Location,Measure,Road Injuries,HIV,Ischemic Heart Disease
0,Afghanistan,Percent of total deaths,0.066641,0.002164,0.179838
1,Angola,Percent of total deaths,0.082701,0.195853,0.050400
2,Albania,Percent of total deaths,0.068380,0.000502,0.173301
3,Andorra,Percent of total deaths,0.076438,0.026947,0.066150
4,United Arab Emirates,Percent of total deaths,0.152665,0.005384,0.156873
...,...,...,...,...,...
199,Zambia,Percent of total deaths,0.026893,0.299726,0.037537
200,Zimbabwe,Percent of total deaths,0.030763,0.266965,0.051816
201,Egypt,Percent of total deaths,0.130131,0.000338,0.270220
202,Sudan,Percent of total deaths,0.076786,0.079577,0.216912


In [14]:
# Merge the datasets based on the 'Location' column
# Merge DataFrames using full outer join
data = pd.merge(cleaned_edu, cleaned_GBD, on='Location', how='outer')

data

Unnamed: 0,Location,Avg. Ed,Measure,Road Injuries,HIV,Ischemic Heart Disease
0,Afghanistan,5.690000,Percent of total deaths,0.066641,0.002164,0.179838
1,Africa,6.681866,,,,
2,Albania,10.320000,Percent of total deaths,0.068380,0.000502,0.173301
3,Algeria,8.180000,Percent of total deaths,0.162423,0.004090,0.219362
4,American Samoa,,Percent of total deaths,0.033994,0.003513,0.146465
...,...,...,...,...,...,...
224,Vietnam,8.350000,,,,
225,World,8.840671,,,,
226,Yemen,5.560000,Percent of total deaths,0.121017,0.003649,0.189279
227,Zambia,8.370000,Percent of total deaths,0.026893,0.299726,0.037537


In [15]:
# Drop rows with NaN values
data.dropna(inplace=True)
data

Unnamed: 0,Location,Avg. Ed,Measure,Road Injuries,HIV,Ischemic Heart Disease
0,Afghanistan,5.69,Percent of total deaths,0.066641,0.002164,0.179838
2,Albania,10.32,Percent of total deaths,0.068380,0.000502,0.173301
3,Algeria,8.18,Percent of total deaths,0.162423,0.004090,0.219362
8,Argentina,9.86,Percent of total deaths,0.079144,0.026741,0.078703
9,Armenia,10.54,Percent of total deaths,0.048349,0.005419,0.198225
...,...,...,...,...,...,...
213,United Kingdom,12.90,Percent of total deaths,0.034596,0.003553,0.100794
218,Uruguay,8.76,Percent of total deaths,0.080525,0.031036,0.066344
226,Yemen,5.56,Percent of total deaths,0.121017,0.003649,0.189279
227,Zambia,8.37,Percent of total deaths,0.026893,0.299726,0.037537


In [16]:
# Assuming 'data' is your DataFrame
fig_1 = px.scatter(data, y='Avg. Ed', x='Road Injuries', trendline='ols', hover_name='Location', trendline_color_override='red')

# Update layout to add title
fig_1.update_layout(
    title={'text':'Road Injuries vs. Average Years of Education by Location','x': 0.5,
        'xanchor': 'center'})

# Show the plot
fig_1.show()

In [17]:
# Assuming 'data' is your DataFrame
fig_2 = px.scatter(data, y='Avg. Ed', x='HIV', trendline='ols', hover_name='Location',trendline_color_override='red')

# Update layout to add title
fig_2.update_layout(title={'text':'HIV vs. Average Years of Education by Location', 'x': 0.5, 'xanchor': 'center'})

# Show the plot
fig_2.show()

In [18]:
# Assuming 'data' is your DataFrame
fig_3 = px.scatter(data, y='Avg. Ed', x='Ischemic Heart Disease', hover_name='Location', trendline_color_override='red', trendline='ols')

# Update layout to add title
fig_3.update_layout(title={'text':'Ischemic Heart Disease vs. Average Years of Education by Location', 'x': 0.5, 'xanchor': 'center'})

# Show the plot
fig_3.show()

In [19]:
# Save the plots as HTML files
pio.write_html(fig_1,"Relationship_avg_ed_road_injuries.html")
pio.write_html(fig_2, "Relationship_avg_ed_hiv.html")
pio.write_html(fig_3, "Relationship_avg_ed_heart_disease.html")

In [20]:
# Save graph as PNG image
# pio.write_image(fig_1,"Road Injuries vs. Average Education by Location.png")
# pio.write_image(fig_2,"HIV vs. Average Education by Location.png")
# pio.write_image(fig_3,"Ischemic Heart Diesease vs. Average Education by Location.png")

In [22]:
# Getting the JSON data

json_data = pio.to_json(fig_1)
json_data

'{"data":[{"hovertemplate":"<b>%{hovertext}</b><br><br>Road Injuries=%{x}<br>Avg. Ed=%{y}<extra></extra>","hovertext":["Afghanistan","Albania","Algeria","Argentina","Armenia","Australia","Austria","Bahrain","Bangladesh","Barbados","Belgium","Belize","Benin","Botswana","Brazil","Bulgaria","Burundi","Cambodia","Cameroon","Canada","Central African Republic","Chile","China","Colombia","Congo","Costa Rica","Croatia","Cuba","Cyprus","Czechia","Denmark","Dominican Republic","Ecuador","Egypt","El Salvador","Estonia","Eswatini","Fiji","Finland","France","Gabon","Gambia","Germany","Ghana","Greece","Guatemala","Guyana","Haiti","Honduras","Hungary","Iceland","India","Indonesia","Iraq","Ireland","Israel","Italy","Jamaica","Japan","Jordan","Kazakhstan","Kenya","Kuwait","Kyrgyzstan","Latvia","Lesotho","Liberia","Libya","Lithuania","Luxembourg","Malawi","Malaysia","Maldives","Mali","Malta","Mauritania","Mauritius","Mexico","Mongolia","Morocco","Mozambique","Myanmar","Namibia","Nepal","Netherlands","Ne

In [23]:
json_data = pio.to_json(fig_2)
json_data

'{"data":[{"hovertemplate":"<b>%{hovertext}</b><br><br>HIV=%{x}<br>Avg. Ed=%{y}<extra></extra>","hovertext":["Afghanistan","Albania","Algeria","Argentina","Armenia","Australia","Austria","Bahrain","Bangladesh","Barbados","Belgium","Belize","Benin","Botswana","Brazil","Bulgaria","Burundi","Cambodia","Cameroon","Canada","Central African Republic","Chile","China","Colombia","Congo","Costa Rica","Croatia","Cuba","Cyprus","Czechia","Denmark","Dominican Republic","Ecuador","Egypt","El Salvador","Estonia","Eswatini","Fiji","Finland","France","Gabon","Gambia","Germany","Ghana","Greece","Guatemala","Guyana","Haiti","Honduras","Hungary","Iceland","India","Indonesia","Iraq","Ireland","Israel","Italy","Jamaica","Japan","Jordan","Kazakhstan","Kenya","Kuwait","Kyrgyzstan","Latvia","Lesotho","Liberia","Libya","Lithuania","Luxembourg","Malawi","Malaysia","Maldives","Mali","Malta","Mauritania","Mauritius","Mexico","Mongolia","Morocco","Mozambique","Myanmar","Namibia","Nepal","Netherlands","New Zealand"

In [24]:
json_data = pio.to_json(fig_3)
json_data

'{"data":[{"hovertemplate":"<b>%{hovertext}</b><br><br>Ischemic Heart Disease=%{x}<br>Avg. Ed=%{y}<extra></extra>","hovertext":["Afghanistan","Albania","Algeria","Argentina","Armenia","Australia","Austria","Bahrain","Bangladesh","Barbados","Belgium","Belize","Benin","Botswana","Brazil","Bulgaria","Burundi","Cambodia","Cameroon","Canada","Central African Republic","Chile","China","Colombia","Congo","Costa Rica","Croatia","Cuba","Cyprus","Czechia","Denmark","Dominican Republic","Ecuador","Egypt","El Salvador","Estonia","Eswatini","Fiji","Finland","France","Gabon","Gambia","Germany","Ghana","Greece","Guatemala","Guyana","Haiti","Honduras","Hungary","Iceland","India","Indonesia","Iraq","Ireland","Israel","Italy","Jamaica","Japan","Jordan","Kazakhstan","Kenya","Kuwait","Kyrgyzstan","Latvia","Lesotho","Liberia","Libya","Lithuania","Luxembourg","Malawi","Malaysia","Maldives","Mali","Malta","Mauritania","Mauritius","Mexico","Mongolia","Morocco","Mozambique","Myanmar","Namibia","Nepal","Netherl