# Data Gathering, Cleaning, and Exploration

In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
from scipy import stats
#Install cursor dependencies
import  mplcursors
%matplotlib nbagg

# Census API Key and config
from config import api_key
c = Census(api_key, year=2018)

# Dictionary of state abbreviations
from us_state_abbrev import us_state_abbrev

ImportError: cannot import name 'api_key' from 'config' (C:\Users\14043\Downloads\Newfolder\lib\site-packages\config\__init__.py)

## Census Data

In [None]:
# Make API calls
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E"), {'for': 'county:*'})
# CHECK FOR YEAR (2018?)

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering/Renaming
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "NAME": "Name", "county": "County"})




In [None]:
census = census_pd[["County", "Name", "Population", "Median Age", "Household Income", ]].copy()
census

In [None]:
# Final DataFrame

# Copy and save to csv
census = census_pd[["County", "Name", "Population", "Median Age", "Household Income"]].copy()
census.to_csv("resources/census_data.csv", encoding="utf-8", index=False)

# Combine county name with state for later merging
census['county'] = census['Name'].str.replace(' County', '').str.replace(' Parish', '')
census.drop(['County', 'Name'], axis=1, inplace=True)
census.set_index('county', inplace=True)
census

In [None]:
# Summary stats
census.describe()

## Covid Data

In [None]:
# Read from csv and combine county and state into one column, then drop unnecessary columns 
covid = pd.read_csv('resources/covid-statistics.csv')
covid['county'] = covid['county'] + ', ' + covid['state']
covid.drop(['state', 'fips', 'date'], axis=1, inplace=True)
covid

In [None]:
# Visualizing time dependency of cases and deaths for a particular county
fulton = covid[covid['county'] == 'Fulton, Georgia']
xs = range(len(fulton))

plt.subplot(1, 2, 1)
plt.plot(xs, fulton.cases)
plt.title('Cases')

plt.subplot(1, 2, 2)
plt.plot(xs, fulton.deaths)
plt.title('Deaths')

plt.tight_layout()
plt.show()

In [None]:
# Extracting final cumulative cases and deaths per county
covid_total = covid.groupby('county').max()
covid_total.dropna(inplace=True)
covid_total

In [None]:
# Summary stats
covid_total.describe()

## General Health Data

In [None]:
# Defining function for cleaning up data from chronicdata.cdc.gov
def makehealthcsv(x, colname, boolean_whether_you_want_latitude):
    
    # Filter
    x = x[(x['Stratification1'] == 'Overall') &  (x['Stratification2'] == 'Overall')]
    
    # Use state abbreviation dictionary and create county name column to be consistent with other dataframes
    x = x[x["LocationAbbr"].isin(us_state_abbrev)]
    x['county'] = x['LocationDesc'] + ', ' + x['LocationAbbr'].apply(lambda x: us_state_abbrev[x])
    x['county'] = x['county'].str.replace(' County', '').str.replace(' Parish', '')
    if boolean_whether_you_want_latitude:
        x = x[['county','Data_Value', 'Y_lat', 'X_lon']].dropna() #CARLA - Changed 02/03/2022 to add latitude and longitude
    else:
        x = x[['county','Data_Value']].dropna()
    
    # Drop missing data, duplicates, and rename column of interest
    #x = x[['county','Data_Value', 'Y_lat', 'X_lon']].dropna() #CARLA - Changed 02/03/2022 to add latitude and longitude
    x.rename(columns = {'Data_Value' : colname}, inplace=True)
    x.drop_duplicates('county', inplace=True)
    x.set_index('county', inplace=True)
    
    return x

In [None]:
# Heart disease data by county
heart_disease = makehealthcsv(pd.read_csv("resources/Heart-Disease-by-County.csv"), 'Heart Disease', True)
heart_disease

In [None]:
# Summary stats
heart_disease.describe()

In [None]:
# Stroke data by county
stroke = makehealthcsv(pd.read_csv("resources/Stroke-by-County.csv"), 'Stroke', False)
stroke

In [None]:
# Summary stats
stroke.describe()

## Vaccination Data

In [None]:
# Read raw csv
vaccine_data = pd.read_csv('resources/vaccine_data.csv')
vaccine_data

In [None]:
# Extract final vaccination (two doses) rates 
vaccine = vaccine_data.groupby(['Recip_County', 'Recip_State'])['Series_Complete_Pop_Pct'].max().reset_index()

# Get full county name in format consistent with other dataframes
vaccine = vaccine[vaccine["Recip_State"].isin(us_state_abbrev)]
vaccine['county'] = vaccine['Recip_County'] + ', ' + vaccine['Recip_State'].apply(lambda x: us_state_abbrev[x])
vaccine['county'] = vaccine['county'].str.replace(' County', '').str.replace(' Parish', '')

# Drop NaNs, rename column, set index for merging
vaccine = vaccine[['county','Series_Complete_Pop_Pct']].dropna()
vaccine.rename(columns={'Series_Complete_Pop_Pct': "Vaccination Rate"}, inplace=True)
vaccine.set_index('county', inplace=True)
vaccine

In [None]:
# Summary stats
vaccine.describe()

## Election Data

In [None]:
# Read csv and look at most recent election results by county
party = pd.read_csv('resources/election_data.csv')
party = party[party['year'] == 2020]

# Combine county and state names into sinlge column 
party['county'] = party['county_name'].str.title() + ', ' + party['state'].str.title()
party.drop(columns = ['state', 'state_po', 'county_name', 'county_fips'], inplace=True)
party

In [None]:
# For each county, add up votes and determine winning party
# Perhaps cleaner way of doing this but my brain has melted

prev_county = party.iloc[0].county
parties = {}
winners = {}

for index, row in party.iterrows():
    
    # Populate set of parties with corresponding votes
    if row['county'] == prev_county:
        if row['party'] not in parties:
            parties[row['party']] = row['candidatevotes']
        else:
            parties[row['party']] += row['candidatevotes']
        
        # Find winner if we reach last row
        if index == party.iloc[-1].name:
            most_votes = 0
            for p in parties:
                if parties[p] > most_votes:
                    winner = p
                    most_votes = parties[p]
                
            winners[prev_county] = winner
    
    # Find winner if row is for new county
    elif row['county'] != prev_county:
        most_votes = 0
        for p in parties:
            if parties[p] > most_votes:
                winner = p
                most_votes = parties[p]
                
        winners[prev_county] = winner
        
        # Reset vote counts, set new county
        parties = {}
        prev_county = row['county']
        parties[row['party']] = row['candidatevotes']

In [None]:
# Replace party column with winning party for each county
party['party'] = [winners[county] for county in party['county']]

# Extract column of interest
party_df = party.groupby('county')['party'].max()

display(party_df)
print("Totals:")
display(party_df.value_counts())

## Combined Data

In [None]:
# Concatenate above dataframes 
merged_df = pd.concat([covid_total, vaccine, census, heart_disease, stroke, party_df], join='inner', axis=1)
merged_df.dropna(inplace=True)
merged_df.reset_index(inplace=True)

# Split county and state name into separate columns for future analysis
merged_df[['County','State']] = merged_df.county.str.split(", ",expand=True)
merged_df.drop(columns=['county'], inplace=True)

# New columns for cases and death by capita
merged_df['Cases per Capita'] = merged_df['cases']/merged_df['Population']
merged_df['Deaths per Capita'] = merged_df['deaths']/merged_df['Population']

# Rename columns and save to csv
merged_df.rename(columns = {'cases' : 'Cases', 'deaths' : 'Deaths', 'party': 'Party'}, inplace=True)
merged_df.to_csv('resources/full_data.csv', index=False)

# Filtering out very small and large counties by population and save in separate csv
merged_df.drop(merged_df.loc[merged_df['Population'] < 20000].index, inplace = True)
merged_df.drop(merged_df.loc[merged_df['Population'] > 200000].index, inplace = True)

#Carla Additions 02/03/2022 - Drop repetitive lat and long and rename
#merged_df.drop(["Y_lat_x"], axis = 1, inplace=True)
#merged_df.drop(["X_lon_x"], axis = 1, inplace=True) 
merged_df.rename(columns = {'Y_lat' : "Y_Latitude"}, inplace = True) 
merged_df.rename(columns = {'X_lon' : "X_Longitude"}, inplace = True) 


#merged_df.to_csv('resources/data.csv', index=False)

merged_df

In [None]:
# Correlation matrix
corr = merged_df.corr()
corr.style.background_gradient(cmap='coolwarm', axis=None)

In [None]:
#Sorting through states and relegating to correct region
#West Region
West_df = merged_df.loc[(merged_df["State"]=="Washington") | (merged_df["State"]=="Oregon")
                        |(merged_df["State"]=="California") | (merged_df["State"]=="Idaho") 
                        | (merged_df["State"]=="Nevada") | (merged_df["State"]=="Utah") 
                        | (merged_df["State"]=="Arizona") | (merged_df["State"]=="Alaska") 
                        | (merged_df["State"]=="Hawaii"),:]

#Plains Region
Plains_df = merged_df.loc[(merged_df["State"]=="Montana") | (merged_df["State"]=="North Dakota")
                        |(merged_df["State"]=="South Dakota") | (merged_df["State"]=="Wyoming") 
                        | (merged_df["State"]=="Nebraska") | (merged_df["State"]=="Colorado") 
                        | (merged_df["State"]=="Oklahoma") | (merged_df["State"]=="Kansas") 
                        | (merged_df["State"]=="Texas") | (merged_df["State"]=="New Mexico"),:]

#Midwest Region
Midwest_df = merged_df.loc[(merged_df["State"]=="Minnesota") | (merged_df["State"]=="Wisconsin")
                        |(merged_df["State"]=="Michigan") | (merged_df["State"]=="Iowa") 
                        | (merged_df["State"]=="Illinois") | (merged_df["State"]=="Indiana") 
                        | (merged_df["State"]=="Ohio") | (merged_df["State"]=="Missouri") 
                        | (merged_df["State"]=="Kentucky"),:]

#Southeast Region
Southeast_df = merged_df.loc[(merged_df["State"]=="Arkansas") | (merged_df["State"]=="Louisiana")
                        |(merged_df["State"]=="Tennessee") | (merged_df["State"]=="Mississippi") 
                        | (merged_df["State"]=="Alabama") | (merged_df["State"]=="Georgia") 
                        | (merged_df["State"]=="Florida") | (merged_df["State"]=="North Carolina") 
                        | (merged_df["State"]=="South Carolina"),:]
#Northeast Region of US
Northeast_df = merged_df.loc[(merged_df["State"]=="Maine") | (merged_df["State"]=="New Hampshire")
                        |(merged_df["State"]=="Vermont") | (merged_df["State"]=="New York") 
                        | (merged_df["State"]=="Massachusetts") | (merged_df["State"]=="Connecticut") 
                        | (merged_df["State"]=="Rhode Island") | (merged_df["State"]=="New Jersey") 
                        | (merged_df["State"]=="Pennsylvania") | (merged_df["State"]=="Delaware") 
                        | (merged_df["State"]=="West Virginia") | (merged_df["State"]=="Virginia")
                        | (merged_df["State"]=="Maryland"),:]

#labels
West = ["Washington","Oregon","California","Idaho","Nevada","Utah","Arizona","Alaska","Hawaii"]
Plains = ["Montana","North Dakota","South Dakota","Wyoming","Nebraska","Colorado","Oklahoma","Kansas","Texas","New Mexico"]
Midwest = ["Minnesota","Wisconsin","Michigan","Iowa","Illinois","Indiana","Ohio","Missouri","Kentucky"]
Southeast = ["Arkanas","Louisiana","Tennessee","Mississippi","Alabama","Georgia","Florida","North Carolina","South Carolina"]
Northeast = ["Maine","New Hampshire","Vermont","New York","Massachusetts","Connecticut","Rhode Island",
             "New Jersey","Pennsylvania","Delaware","West Virginia","Virginia","Maryland"]

In [None]:
#Variables
West_co = West_df["County"]
West_pop = West_df["Population"]
West_cases = West_df["Cases"]
West_heart = West_df["Heart Disease"]
West_cap = West_df["Deaths per Capita"]

#Plot Graph
plt.figure(figsize=(10,8))
plt.ylabel("Population")
Scatter = plt.scatter(West_co, West_pop,s=100,
            c=West_df.State.astype('category').cat.codes)
mplcursors.cursor(Scatter,hover=True)
plt.legend(loc="lower center", bbox_to_anchor=(.50, -0.12), ncol= 4, handles=Scatter.legend_elements()[0], 
           labels=West,
           title="State")
Scatter.axes.get_xaxis().set_visible(False)
plt.title("United States Region: West | County versus Population")

#Deaths per capita
#Plot Graph
plt.figure(figsize=(10,8))
plt.xlabel("Cases of Covid")
plt.ylabel("Deaths per Capita")
Scatter2 = plt.scatter(West_cases, West_cap,s=West_heart*.3,
            c=West_df.State.astype('category').cat.codes)
#Hover Event
mplcursors.cursor(Scatter2, hover=True)
plt.legend(loc="upper right", ncol= 2, handles=Scatter2.legend_elements()[0], 
           labels=West,
           title="State")
plt.title("Region: West | Deaths per Capita versus Covid Cases in Relation to Heart Disease")
print("This graph shows the deaths per capita and covid cases recorded by each county. The marker point sizes vary due to the number of instances of heart disease recorded.")

In [None]:
#Variables
Plains_co = Plains_df["County"]
Plains_pop = Plains_df["Population"]
Plains_cases = Plains_df["Cases"]
Plains_heart = Plains_df["Heart Disease"]
Plains_cap = Plains_df["Deaths per Capita"]

#Plot Graph for County and population size
plt.figure(figsize=(8,8))
plt.ylabel("Population")
Scatter = plt.scatter(Plains_co, Plains_pop,s=100,
            c=Plains_df.State.astype('category').cat.codes)
Scatter.axes.get_xaxis().set_visible(False)
mplcursors.cursor(Scatter, hover=True)
plt.title("United States Region: Plains | County versus Population")
plt.legend(loc="lower center", bbox_to_anchor=(.50, -0.15), ncol= 4, handles=Scatter.legend_elements()[0], 
           labels=Plains,
           title="State")

#Plot Graph
plt.figure(figsize=(10,8))
plt.xlabel("Cases of Covid")
plt.ylabel("Deaths per Capita")
Scatter2 = plt.scatter(Plains_cases, Plains_cap,s=Plains_heart*.3,
            c=Plains_df.State.astype('category').cat.codes)
#Hover Event
mplcursors.cursor(Scatter2, hover=True)
plt.legend(loc="upper right", ncol= 2, handles=Scatter2.legend_elements()[0], 
           labels=West,
           title="State")
plt.title("Region: Plains | Deaths per Capita versus Covid Cases in Relation to Heart Disease")
print("This graph shows the deaths per capita and covid cases recorded by each county. The marker point sizes vary due to the number of instances of heart disease recorded.")

In [None]:
#Variables
Midwest_co = Midwest_df["County"]
Midwest_pop = Midwest_df["Population"]
Midwest_cases = Midwest_df["Cases"]
Midwest_heart = Midwest_df["Heart Disease"]
Midwest_cap = Midwest_df["Deaths per Capita"]

#Plot Graph
plt.figure(figsize=(10,8))
plt.ylabel("Population")
Scatter = plt.scatter(Midwest_co, Midwest_pop,s=100,
            c=Midwest_df.State.astype('category').cat.codes)
mplcursors.cursor(Scatter, hover=True)
plt.legend(loc="lower center", bbox_to_anchor=(.50, -0.15), ncol= 4, handles=Scatter.legend_elements()[0], 
           labels=Midwest,
           title="State")
Scatter.axes.get_xaxis().set_visible(False)
plt.title("United States Region: Midwest | County versus Population")

#Plot Graph
plt.figure(figsize=(10,8))
plt.xlabel("Cases of Covid")
plt.ylabel("Deaths per Capita")
Scatter2 = plt.scatter(Midwest_cases, Midwest_cap,s=Midwest_heart*.3,
            c=Midwest_df.State.astype('category').cat.codes)
#Hover Event
mplcursors.cursor(Scatter2, hover=True)
plt.legend(loc="upper right", ncol= 2, handles=Scatter2.legend_elements()[0], 
           labels=Midwest,
           title="State")
plt.title("Region: Midwest | Deaths per Capita versus Covid Cases in Relation to Heart Disease")
print("This graph shows the deaths per capita and covid cases recorded by each county. The marker point sizes vary due to the number of instances of heart disease recorded.")

In [None]:
#Variables
Southeast_co = Southeast_df["County"]
Southeast_pop = Southeast_df["Population"]
Southeast_cases = Southeast_df["Cases"]
Southeast_heart = Southeast_df["Heart Disease"]
Southeast_cap = Southeast_df["Deaths per Capita"]

#Plot Graph
plt.figure(figsize=(10,8))
plt.ylabel("Population")
Scatter = plt.scatter(Southeast_co, Southeast_pop,s=100,
            c=Southeast_df.State.astype('category').cat.codes)
mplcursors.cursor(Scatter, hover=True)
plt.legend(loc="lower center",bbox_to_anchor=(.50, -0.15), ncol= 4, handles=Scatter.legend_elements()[0], 
           labels=Southeast,
           title="State")
Scatter.axes.get_xaxis().set_visible(False)
plt.title("United States Region: Southeast | County versus Population")

#Plot Graph
plt.figure(figsize=(10,8))
plt.xlabel("Cases of Covid")
plt.ylabel("Deaths per Capita")
Scatter2 = plt.scatter(Southeast_cases, Southeast_cap,s=Southeast_heart*.3,
            c=Southeast_df.State.astype('category').cat.codes)
#Hover Event
mplcursors.cursor(Scatter2, hover=True)
plt.legend(loc="upper right", ncol= 2, handles=Scatter2.legend_elements()[0], 
           labels=Southeast,
           title="State")
plt.title("Region: Southeast | Deaths per Capita versus Covid Cases in Relation to Heart Disease")
print("This graph shows the deaths per capita and covid cases recorded by each county. The marker point sizes vary due to the number of instances of heart disease recorded.")

In [None]:
#Variables
Northeast_co = Northeast_df["County"]
Northeast_pop = Northeast_df["Population"]
Northeast_cases = Northeast_df["Cases"]
Northeast_heart = Northeast_df["Heart Disease"]
Northeast_cap = Northeast_df["Deaths per Capita"]

#Plot Graph
plt.figure(figsize=(10,8))
plt.ylabel("Population")
Scatter = plt.scatter(Northeast_co, Northeast_pop,s=100,
            c=Northeast_df.State.astype('category').cat.codes)
#Hover Event
mplcursors.cursor(Scatter, hover=True)
plt.legend(loc="lower center",bbox_to_anchor=(.50, -0.15), ncol= 4, handles=Scatter.legend_elements()[0], 
           labels=Northeast,
           title="State")
Scatter.axes.get_xaxis().set_visible(False)
plt.title("United States Region: Northeast | County versus Population")

#Plot Graph
plt.figure(figsize=(10,8))
plt.xlabel("Cases of Covid")
plt.ylabel("Deaths per Capita")
Scatter2 = plt.scatter(Northeast_cases, Northeast_cap,s=Northeast_heart*.5,
            c=Northeast_df.State.astype('category').cat.codes)
#Hover Event
mplcursors.cursor(Scatter2, hover=True)
plt.legend(loc="upper right", ncol= 2, handles=Scatter2.legend_elements()[0], 
           labels=Northeast,
           title="State")
plt.title("Region: Northeast | Deaths per Capita versus Covid Cases in Relation to Heart Disease")
print("This graph shows the deaths per capita and covid cases recorded by each county. The marker point sizes vary due to the number of instances of heart disease recorded.")

In [None]:
# Compare political affiliations
reds = merged_df[merged_df['Party'] == 'REPUBLICAN']['Deaths per Capita']
blues = merged_df[merged_df['Party'] == 'DEMOCRAT']['Deaths per Capita']

plt.boxplot([reds, blues], labels=['RED', 'BLUE'])
plt.show()

stats.ttest_ind(reds, blues)

In [None]:
#Takes axis and col names, r=True puts r-value in legend, by default it doesn't, xunit/yunit add unit to labels
def makescatter(ax, col1, col2, r=False, xunit='', yunit=''):
    xs = merged_df[col1]
    ys = merged_df[col2]
    (m, b, rvalue, p, q) = stats.linregress(xs, ys)
    regress = [m*x + b for x in xs]
    ax.scatter(xs, ys)
    ax.plot(xs, regress, 'r-', label=round(rvalue**2, 3))
    if xunit:
        col1 += f' ({xunit})'
    if yunit:
        col2 += f' ({yunit})'
    ax.set_xlabel(col1)
    ax.set_ylabel(col2)
    ax.set_title(col1 + ' versus ' + col2)
    if r:
        ax.legend()
    return

In [None]:
#plotting 1 plot, no extra stuff
fig, ax1 = plt.subplots(1,1)

makescatter(ax1, 'Deaths per Capita', 'Household Income')


In [None]:
#plotting 1 plot, with rvalue and yunit and figsize corrected
fig, ax1 = plt.subplots(1,1, figsize=(7,6))

makescatter(ax1, 'Deaths per Capita', 'Household Income', r=True, yunit='USD')


In [None]:
#plotting 2 plots

fig, (ax1, ax2) = plt.subplots(1,2, figsize=(15,8))

makescatter(ax1, 'Deaths per Capita', 'Household Income', yunit='USD')

makescatter(ax2, 'Heart Disease', 'Deaths per Capita', xunit='Per 100,000', r=True)


## Boxplots

## Boxplot for Deaths per Capita

In [None]:
#Divide states in 2 by alphabetical order
first_half_states = merged_df[merged_df["State"].str[0] <= "M"] #Select those states whose first letter <= "M"
second_half_states= merged_df[merged_df["State"].str[0] > "M"] #Select those states whose first letter > "M"

In [None]:
def create_vertical_box_plot(figsize,title,df1,df2,column,by,ylabel_1,ylabel_2):
    fig, (ax1, ax2) = plt.subplots(2,1, figsize=(15,8))
    
    df1.boxplot(ax = ax1,column = column, by=by, rot = 90); #figsize=(20,10)
    ax1.set_title(ylabel_1, fontsize = 12);

    df2.boxplot(ax = ax2,column =column, by=by, rot = 90);
    ax2.set_title(ylabel_2, fontsize = 12);
    
    fig.tight_layout(pad = 3) #separation between boxplots
    fig.suptitle(title, fontsize = 20, fontweight = "bold")
    return

In [None]:
#Create box plot deaths per capita calling function above
create_vertical_box_plot((20,10),"Deaths per Capita by State",first_half_states, second_half_states, "Deaths per Capita","State", "Deaths per Capita for States A though M", "Deaths per Capita for states N through Z")

## Box plot for Cases by Capita

In [None]:
create_vertical_box_plot((20,10),"Cases per Capita by State",first_half_states,second_half_states,"Cases per Capita","State","Cases per Capita for States A through M","Cases per Capita for States N through Z");

## Heat Maps

In [None]:
#import dependencies
import requests
import gmaps
import os

# Import API key
from api_keys import g_key
gmaps.configure(api_key = g_key)

In [None]:
#Select those states with Highest number of Cases per Capita
#Sort Descending by Cases per Capita
sorted_df_by_cases = merged_df.sort_values(by=["Cases per Capita"], ascending=False,ignore_index = True).head(5)

#create locations for heatmap
locations_sorted_df_by_cases = sorted_df_by_cases[["Y_Latitude", "X_Longitude"]]

#shocase the df
sorted_df_by_cases

In [None]:
#Select those states with Highest number of Deaths per Capita
#Sort Descending by Cases per Capita
sorted_df_by_deaths = merged_df.sort_values(by=["Deaths per Capita"], ascending=False, ignore_index = True).head(5)

#create locations and info for box templaye for heatmap
locations_sorted_df_by_deaths = sorted_df_by_deaths[["Y_Latitude", "X_Longitude"]]


#shocase the df
sorted_df_by_deaths

In [None]:
# Using the template add the County/State to the heatmap
info_box_template_cases_per_capita = """
<dl>
<dt>County</dt><dd>{County}</dd>
<dt>State</dt><dd>{State}</dd>
<dt>Cases per Capita</dt><dd>{Cases per Capita}</dd>
</dl>
"""

box_template_cases = [info_box_template_cases_per_capita.format(**row) for index, row in sorted_df_by_cases.iterrows()]

In [None]:
# Using the template add the County/State to the heatmap
info_box_template_deaths_per_capita = """
<dl>
<dt>County</dt><dd>{County}</dd>
<dt>State</dt><dd>{State}</dd>
<dt>Deaths per Capita</dt><dd>{Deaths per Capita}</dd>
</dl>
"""

box_template_deaths = [info_box_template_deaths_per_capita.format(**row) for index, row in sorted_df_by_deaths.iterrows()]

In [None]:
def create_map(weights, max_intensity, locations_for_marker_layer, info_box_for_marker_layer, locations_for_symbol_layer, info_box_for_symbol_layer):
    figure = gmaps.figure()
    locations = merged_df[["Y_Latitude", "X_Longitude"]]
    heat_layer = gmaps.heatmap_layer(locations, weights = weights, dissipating = False, max_intensity = max_intensity,point_radius = 0.3)
    figure.add_layer(heat_layer)
    # Add marker layer ontop of heat map
    markers = gmaps.marker_layer(locations_sorted_df_by_cases, info_box_content = info_box_for_marker_layer)
    figure.add_layer(markers)
    symbol_layer = gmaps.symbol_layer(locations_for_symbol_layer, info_box_content=info_box_for_symbol_layer)
    figure.add_layer(symbol_layer)

    return figure

In [None]:
figure1 = create_map(merged_df["Heart Disease"], 600, locations_sorted_df_by_cases, box_template_cases,locations_sorted_df_by_deaths,box_template_deaths)
figure1

In [None]:
figure2 = create_map(merged_df["Stroke"], 0.1, locations_sorted_df_by_cases, info_box_template_cases_per_capita,locations_sorted_df_by_deaths,info_box_template_deaths_per_capita)
figure2

# COVID Cases Per Day

In [None]:
covid_df = pd.read_csv('resources/covid-statistics.csv')
covid_df.head(5)

In [None]:
covid_df["date"] = pd.to_datetime(covid_df["date"])

In [None]:
date = covid_df["date"]
covid_cases = covid_df["cases"]
covid_deaths = covid_df["deaths"]

fig, (ax1,ax2) = plt.subplots(2,1,figsize=(15, 10),sharex = True) #removed sharey = True

ax1.plot(date, covid_cases)
ax1.set_ylabel("Covid Cases", fontsize = 16)

ax2.plot(date, covid_deaths);
ax2.set_ylabel("Covid Deaths", fontsize = 16)
ax2.set_xlabel("Date", fontsize = 16)

fig.suptitle('Covid Accross Time', fontsize=16)
fig.tight_layout(pad = 3) #separation between boxplots