In [22]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect
import sqlite3
import pandas as pd


In [2]:
# Create engine using the sqlite database file
engine = create_engine("sqlite:///./database/project3_group6.sqlite")

In [3]:
# Reflect Database into ORM classes
Base = automap_base()
Base.prepare(autoload_with=engine)
Base.classes.keys()

['death_rates', 'fast_food', 'us_states', 'gdp_state', 'overweight_obesity']

In [4]:
# Map classes
death_rates = Base.classes.death_rates
fast_food =  Base.classes.fast_food
us_states =  Base.classes.fast_food
gdp_state =  Base.classes.gdp_state
overweight_obesity =  Base.classes.overweight_obesity

In [5]:
# create a session
session = Session(engine)

In [6]:
# Create the inspector and connect it to the engine
inspector = inspect(engine)

In [7]:
inspector.get_table_names()

['death_rates',
 'fast_food',
 'gdp_state',
 'overweight_obesity',
 'sqlite_sequence',
 'us_states']

In [13]:
us_states_df = pd.read_sql_table('us_states', engine)
us_states_df

Unnamed: 0,state_code,state_name
0,DE,Delaware
1,KS,Kansas
2,LA,Louisiana
3,KY,Kentucky
4,NY,New York
5,PA,Pennsylvania
6,GU,Guam
7,IN,Indiana
8,PR,Puerto Rico
9,ID,Idaho


In [9]:
overweight_obesity_df = pd.read_sql_table('overweight_obesity', engine)
overweight_obesity_df.head(10)

Unnamed: 0,ID,date_year,state_code,response,break_out,break_out_category,sample_size,data_value,latitude,longitude
0,1,2019,DE,Obese (BMI 30.0 - 99.8),55-64,Age Group,299,40.1,39.008831,-75.577741
1,2,2019,KS,Obese (BMI 30.0 - 99.8),25-34,Age Group,386,34.3,38.34774,-98.200781
2,3,2019,LA,Obese (BMI 30.0 - 99.8),55-64,Age Group,395,41.9,31.312661,-92.44568
3,4,2019,KY,Overweight (BMI 25.0-29.9),35-44,Age Group,294,35.6,37.64597,-84.774971
4,5,2019,NY,Overweight (BMI 25.0-29.9),45-54,Age Group,713,37.2,42.827001,-75.54397
5,6,2019,PA,Obese (BMI 30.0 - 99.8),18-24,Age Group,94,23.3,40.79373,-77.8607
6,7,2019,GU,Obese (BMI 30.0 - 99.8),18-24,Age Group,39,22.2,13.444304,144.793731
7,8,2014,GU,Obese (BMI 30.0 - 99.8),25-34,Age Group,148,34.6,13.444304,144.793731
8,9,2014,IN,Overweight (BMI 25.0-29.9),25-34,Age Group,249,32.8,39.76691,-86.14996
9,10,2014,PR,Overweight (BMI 25.0-29.9),45-54,Age Group,401,42.7,18.220833,-66.590149


In [11]:
# Extract unique state codes with latitude and longitude
state_lat_long_df = overweight_obesity_df[['state_code', 'latitude', 'longitude']].drop_duplicates()
state_lat_long_df.head()

Unnamed: 0,state_code,latitude,longitude
0,DE,39.008831,-75.577741
1,KS,38.34774,-98.200781
2,LA,31.312661,-92.44568
3,KY,37.64597,-84.774971
4,NY,42.827001,-75.54397


In [33]:
merged_df = pd.merge(state_lat_long_df, us_states_df, on='state_code', how='left')
merged_df.drop_duplicates(subset=['state_name']).reset_index(drop=True)
merged_df.head(20)

Unnamed: 0,state_code,latitude,longitude,state_name
0,DE,39.008831,-75.577741,Delaware
1,KS,38.34774,-98.200781,Kansas
2,LA,31.312661,-92.44568,Louisiana
3,KY,37.64597,-84.774971,Kentucky
4,NY,42.827001,-75.54397,New York
5,PA,40.79373,-77.8607,Pennsylvania
6,GU,13.444304,144.793731,Guam
7,IN,39.76691,-86.14996,Indiana
8,PR,18.220833,-66.590149,Puerto Rico
9,ID,43.68263,-114.36373,Idaho


In [12]:
gdp_state_df = pd.read_sql_table('gdp_state', engine)
gdp_state_df.head(10)

Unnamed: 0,ID,state_name,description,amount_2014,amount_2015,amount_2016,amount_2017,amount_2018,amount_2019
0,1,United States,GDP (Billions of Dollars),16932.051,17390.295,17680.274,18076.651,18609.078,19036.052
1,2,Alabama,GDP (Billions of Dollars),189.8863,191.3352,194.2838,196.9749,200.3726,203.4327
2,3,Alaska,GDP (Billions of Dollars),54.1882,54.7408,54.2466,54.2787,53.327,53.4338
3,4,Arizona,GDP (Billions of Dollars),276.9489,282.577,291.2752,303.6061,314.8275,325.3953
4,5,Arkansas,GDP (Billions of Dollars),111.7345,112.351,112.7981,113.8502,115.8852,117.1262
5,6,California,GDP (Billions of Dollars),2256.0547,2357.4529,2427.8946,2538.204,2644.0612,2729.2258
6,7,Colorado,GDP (Billions of Dollars),298.6553,312.4097,318.9534,329.9133,342.7332,358.4385
7,8,Connecticut,GDP (Billions of Dollars),235.7809,242.7065,243.2867,247.0359,249.0748,251.5682
8,9,Delaware,GDP (Billions of Dollars),64.9401,66.7935,63.0013,60.3575,61.4251,64.1435
9,10,District of Columbia,GDP (Billions of Dollars),114.5536,116.8084,119.6443,120.7594,123.6805,124.5972


In [37]:
location_gdp_df = pd.merge(gdp_state_df, merged_df, on='state_name', how='left')

In [54]:
location_gdp_df = location_gdp_df.drop_duplicates(subset=['state_name']).reset_index(drop=True)

In [86]:
decimals = 2  # Set the desired number of decimals

# Round columns to the specified number of decimals
location_gdp_df['amount_2014'] = location_gdp_df['amount_2014'].round(decimals)
location_gdp_df['amount_2015'] = location_gdp_df['amount_2015'].round(decimals)
location_gdp_df['amount_2016'] = location_gdp_df['amount_2016'].round(decimals)
location_gdp_df['amount_2017'] = location_gdp_df['amount_2017'].round(decimals)
location_gdp_df['amount_2018'] = location_gdp_df['amount_2018'].round(decimals)
location_gdp_df['amount_2019'] = location_gdp_df['amount_2019'].round(decimals)
location_gdp_df

Unnamed: 0,ID,state_name,description,amount_2014,amount_2015,amount_2016,amount_2017,amount_2018,amount_2019,state_code,latitude,longitude
0,2,Alabama,GDP (Billions of Dollars),189.89,191.34,194.28,196.97,200.37,203.43,AL,32.840571,-86.631861
1,3,Alaska,GDP (Billions of Dollars),54.19,54.74,54.25,54.28,53.33,53.43,AK,64.84508,-147.722059
2,4,Arizona,GDP (Billions of Dollars),276.95,282.58,291.28,303.61,314.83,325.4,AZ,34.86597,-111.763811
3,5,Arkansas,GDP (Billions of Dollars),111.73,112.35,112.8,113.85,115.89,117.13,AR,34.74865,-92.274491
4,6,California,GDP (Billions of Dollars),2256.05,2357.45,2427.89,2538.2,2644.06,2729.23,CA,37.63864,-121.0
5,7,Colorado,GDP (Billions of Dollars),298.66,312.41,318.95,329.91,342.73,358.44,CO,38.843841,-106.133611
6,8,Connecticut,GDP (Billions of Dollars),235.78,242.71,243.29,247.04,249.07,251.57,CT,41.562661,-72.649841
7,9,Delaware,GDP (Billions of Dollars),64.94,66.79,63.0,60.36,61.43,64.14,DE,39.008831,-75.577741
8,10,District of Columbia,GDP (Billions of Dollars),114.55,116.81,119.64,120.76,123.68,124.6,DC,38.890371,-77.031961
9,11,Florida,GDP (Billions of Dollars),817.23,852.24,881.54,912.69,941.63,965.67,FL,28.93204,-81.928961


In [57]:
location_gdp_df.to_csv("clean_data/location_gdp.csv", index=False, header=True)

In [58]:
overweight_obesity_df.head(10)

Unnamed: 0,ID,date_year,state_code,response,break_out,break_out_category,sample_size,data_value,latitude,longitude
0,1,2019,DE,Obese (BMI 30.0 - 99.8),55-64,Age Group,299,40.1,39.008831,-75.577741
1,2,2019,KS,Obese (BMI 30.0 - 99.8),25-34,Age Group,386,34.3,38.34774,-98.200781
2,3,2019,LA,Obese (BMI 30.0 - 99.8),55-64,Age Group,395,41.9,31.312661,-92.44568
3,4,2019,KY,Overweight (BMI 25.0-29.9),35-44,Age Group,294,35.6,37.64597,-84.774971
4,5,2019,NY,Overweight (BMI 25.0-29.9),45-54,Age Group,713,37.2,42.827001,-75.54397
5,6,2019,PA,Obese (BMI 30.0 - 99.8),18-24,Age Group,94,23.3,40.79373,-77.8607
6,7,2019,GU,Obese (BMI 30.0 - 99.8),18-24,Age Group,39,22.2,13.444304,144.793731
7,8,2014,GU,Obese (BMI 30.0 - 99.8),25-34,Age Group,148,34.6,13.444304,144.793731
8,9,2014,IN,Overweight (BMI 25.0-29.9),25-34,Age Group,249,32.8,39.76691,-86.14996
9,10,2014,PR,Overweight (BMI 25.0-29.9),45-54,Age Group,401,42.7,18.220833,-66.590149


In [89]:
# Convert 'date_year' to string to ensure it's treated as a categorical variable
overweight_obesity_df['date_year'] = overweight_obesity_df['date_year'].astype(str)

# Filter data for the year 2019
df_2014 = overweight_obesity_df[overweight_obesity_df['date_year'] == '2014']
df_2015 = overweight_obesity_df[overweight_obesity_df['date_year'] == '2015']
df_2016 = overweight_obesity_df[overweight_obesity_df['date_year'] == '2016']
df_2017 = overweight_obesity_df[overweight_obesity_df['date_year'] == '2017']
df_2018 = overweight_obesity_df[overweight_obesity_df['date_year'] == '2018']
df_2019 = overweight_obesity_df[overweight_obesity_df['date_year'] == '2019']


In [96]:
# Group by 'state_code' and 'break_out', then calculate the average 'data_value'
average_data_value_2014 = df_2014.groupby(['state_code', 'break_out', 'latitude', 'longitude'])['data_value'].mean().reset_index()
average_data_value_2015 = df_2015.groupby(['state_code', 'break_out', 'latitude', 'longitude'])['data_value'].mean().reset_index()
average_data_value_2016 = df_2016.groupby(['state_code', 'break_out', 'latitude', 'longitude'])['data_value'].mean().reset_index()
average_data_value_2017 = df_2017.groupby(['state_code', 'break_out', 'latitude', 'longitude'])['data_value'].mean().reset_index()
average_data_value_2018 = df_2018.groupby(['state_code', 'break_out', 'latitude', 'longitude'])['data_value'].mean().reset_index()
average_data_value_2019 = df_2019.groupby(['state_code', 'break_out', 'latitude', 'longitude'])['data_value'].mean().reset_index()


Unnamed: 0,state_code,break_out,latitude,longitude,data_value
0,AK,18-24,64.845080,-147.722059,21.90
1,AK,25-34,64.845080,-147.722059,29.10
2,AK,35-44,64.845080,-147.722059,34.75
3,AK,45-54,64.845080,-147.722059,35.50
4,AK,55-64,64.845080,-147.722059,36.60
...,...,...,...,...,...
313,WY,25-34,43.235541,-108.109830,29.65
314,WY,35-44,43.235541,-108.109830,37.90
315,WY,45-54,43.235541,-108.109830,34.50
316,WY,55-64,43.235541,-108.109830,35.50


In [97]:
# Group by 'state_code' and calculate the overall average 'data_value'
overall_average_data_value_2014 = average_data_value_2014.groupby(['state_code','latitude', 'longitude'])['data_value'].mean().reset_index()
overall_average_data_value_2015 = average_data_value_2015.groupby(['state_code','latitude', 'longitude'])['data_value'].mean().reset_index()
overall_average_data_value_2016 = average_data_value_2016.groupby(['state_code','latitude', 'longitude'])['data_value'].mean().reset_index()
overall_average_data_value_2017 = average_data_value_2017.groupby(['state_code','latitude', 'longitude'])['data_value'].mean().reset_index()
overall_average_data_value_2018 = average_data_value_2018.groupby(['state_code','latitude', 'longitude'])['data_value'].mean().reset_index()
overall_average_data_value_2019 = average_data_value_2019.groupby(['state_code','latitude', 'longitude'])['data_value'].mean().reset_index()


In [69]:
import folium
import fiona
import geopandas as gpd
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster
import math

2014 MAP

In [91]:
#creating the base map
m= folium.Map(location = [38.27312, -98.5821872], zoom_start = 4)

#creating bubble map with restaurant counts per state.
#Loop through each row in the dataframe
for i in range(0,len(location_gdp_df)):
   folium.Circle(
      location=[overall_average_data_value_2014.iloc[i]['latitude'], overall_average_data_value_2014.iloc[i]['longitude']],
      popup=f"GDP 2014: {location_gdp_df.iloc[i]['amount_2014']} (Billions of Dollars)",
      radius=float(location_gdp_df.iloc[i]['amount_2014'])*200,
      color='crimson',
      fill=True,
      fill_color='crimson'
      ).add_to(m)
    
m

In [88]:
# Add a HeatMap map to the base map for Diabeties data
lats_longs_weight = list(map(list, zip(overall_average_data_value_2014["latitude"],
                          overall_average_data_value_2014["longitude"],
                          overall_average_data_value_2014["data_value"]
                         )
               )
           )

#Creating a title for the layers in map#creating the base map
m= folium.Map(location = [38.27312, -98.5821872], zoom_start = 4)

#creating bubble map with restaurant counts per state.
#Loop through each row in the dataframe
for i in range(0,len(location_gdp_df)):
   folium.Circle(
      location=[overall_average_data_value_2015.iloc[i]['latitude'], overall_average_data_value_2015.iloc[i]['longitude']],
      popup=f"GDP 2015: {location_gdp_df.iloc[i]['amount_2015']} (Billions of Dollars)",
      radius=float(location_gdp_df.iloc[i]['amount_2015'])*200,
      color='crimson',
      fill=True,
      fill_color='crimson'
      ).add_to(m)
    
m
fg = folium.FeatureGroup(name="Obesity HeatMap")
fg.add_child(HeatMap(lats_longs_weight))
m.add_child(fg)
m


2015 MAP

In [98]:
#creating the base map
m= folium.Map(location = [38.27312, -98.5821872], zoom_start = 4)

#creating bubble map with restaurant counts per state.
#Loop through each row in the dataframe
for i in range(0,len(location_gdp_df)):
   folium.Circle(
      location=[overall_average_data_value_2015.iloc[i]['latitude'], overall_average_data_value_2015.iloc[i]['longitude']],
      popup=f"GDP 2015: {location_gdp_df.iloc[i]['amount_2015']} (Billions of Dollars)",
      radius=float(location_gdp_df.iloc[i]['amount_2015'])*200,
      color='crimson',
      fill=True,
      fill_color='crimson'
      ).add_to(m)
    
m

In [99]:
# Add a HeatMap map to the base map for Diabeties data
lats_longs_weight = list(map(list, zip(overall_average_data_value_2015["latitude"],
                          overall_average_data_value_2015["longitude"],
                          overall_average_data_value_2015["data_value"]
                         )
               )
           )

#Creating a title for the layers in map
fg = folium.FeatureGroup(name="Obesity HeatMap")
fg.add_child(HeatMap(lats_longs_weight))
m.add_child(fg)
m

2016 MAP

In [102]:
#creating the base map
m= folium.Map(location = [38.27312, -98.5821872], zoom_start = 4)

#creating bubble map with restaurant counts per state.
#Loop through each row in the dataframe
for i in range(0,len(location_gdp_df)):
   folium.Circle(
      location=[overall_average_data_value_2016.iloc[i]['latitude'], overall_average_data_value_2016.iloc[i]['longitude']],
      popup=f"GDP 2016: {location_gdp_df.iloc[i]['amount_2016']} (Billions of Dollars)",
      radius=float(location_gdp_df.iloc[i]['amount_2016'])*200,
      color='crimson',
      fill=True,
      fill_color='crimson'
      ).add_to(m)
    
m

In [101]:
# Add a HeatMap map to the base map for Diabeties data
lats_longs_weight = list(map(list, zip(overall_average_data_value_2016["latitude"],
                          overall_average_data_value_2016["longitude"],
                          overall_average_data_value_2016["data_value"]
                         )
               )
           )

#Creating a title for the layers in map
fg = folium.FeatureGroup(name="Obesity HeatMap")
fg.add_child(HeatMap(lats_longs_weight))
m.add_child(fg)
m

2017 MAP

In [103]:
#creating the base map
m= folium.Map(location = [38.27312, -98.5821872], zoom_start = 4)

#creating bubble map with restaurant counts per state.
#Loop through each row in the dataframe
for i in range(0,len(location_gdp_df)):
   folium.Circle(
      location=[overall_average_data_value_2017.iloc[i]['latitude'], overall_average_data_value_2017.iloc[i]['longitude']],
      popup=f"GDP 2017: {location_gdp_df.iloc[i]['amount_2017']} (Billions of Dollars)",
      radius=float(location_gdp_df.iloc[i]['amount_2017'])*200,
      color='crimson',
      fill=True,
      fill_color='crimson'
      ).add_to(m)
    
m

In [104]:
# Add a HeatMap map to the base map for Diabeties data
lats_longs_weight = list(map(list, zip(overall_average_data_value_2017["latitude"],
                          overall_average_data_value_2017["longitude"],
                          overall_average_data_value_2017["data_value"]
                         )
               )
           )

#Creating a title for the layers in map
fg = folium.FeatureGroup(name="Obesity HeatMap")
fg.add_child(HeatMap(lats_longs_weight))
m.add_child(fg)
m

2018 MAP

In [105]:
#creating the base map
m= folium.Map(location = [38.27312, -98.5821872], zoom_start = 4)

#creating bubble map with restaurant counts per state.
#Loop through each row in the dataframe
for i in range(0,len(location_gdp_df)):
   folium.Circle(
      location=[overall_average_data_value_2018.iloc[i]['latitude'], overall_average_data_value_2018.iloc[i]['longitude']],
      popup=f"GDP 2018: {location_gdp_df.iloc[i]['amount_2018']} (Billions of Dollars)",
      radius=float(location_gdp_df.iloc[i]['amount_2018'])*200,
      color='crimson',
      fill=True,
      fill_color='crimson'
      ).add_to(m)
    
m

In [106]:
# Add a HeatMap map to the base map for Diabeties data
lats_longs_weight = list(map(list, zip(overall_average_data_value_2018["latitude"],
                          overall_average_data_value_2018["longitude"],
                          overall_average_data_value_2018["data_value"]
                         )
               )
           )

#Creating a title for the layers in map
fg = folium.FeatureGroup(name="Obesity HeatMap")
fg.add_child(HeatMap(lats_longs_weight))
m.add_child(fg)
m

2019 MAP

In [109]:
#creating the base map
m= folium.Map(location = [38.27312, -98.5821872], zoom_start = 4)

#creating bubble map with restaurant counts per state.
#Loop through each row in the dataframe
for i in range(0,len(location_gdp_df)):
   folium.Circle(
      location=[overall_average_data_value_2019.iloc[i]['latitude'], overall_average_data_value_2019.iloc[i]['longitude']],
      popup=f"GDP 2019: {location_gdp_df.iloc[i]['amount_2019']} (Billions of Dollars)",
      radius=float(location_gdp_df.iloc[i]['amount_2019'])*200,
      color='crimson',
      fill=True,
      fill_color='crimson'
      ).add_to(m)
    
m

In [110]:
# Add a HeatMap map to the base map for Diabeties data
lats_longs_weight = list(map(list, zip(overall_average_data_value_2019["latitude"],
                          overall_average_data_value_2019["longitude"],
                          overall_average_data_value_2019["data_value"]
                         )
               )
           )

#Creating a title for the layers in map
fg = folium.FeatureGroup(name="Obesity HeatMap")
fg.add_child(HeatMap(lats_longs_weight))
m.add_child(fg)
m

The Average GDP and Obesity rates