In [1]:
# import dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
import gmaps
import time
from us import states
from scipy.stats import linregress
from matplotlib import pyplot as plt

# API Keys
from config import (census_key, gkey)
c = Census(census_key, year=2017)

ModuleNotFoundError: No module named 'config'

In [None]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E", "B17001_002E", "B02001_002E", "B02001_005E", 
                          "B02001_003E", "B02001_004E", "B02001_007E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B02001_002E": "White Population",
                                      "B02001_004E": "Native American Population",
                                      "B02001_005E": "Asian Population",
                                      "B02001_003E": "African American Population",
                                      "B02001_007E": "Other Population",
                                      "NAME": "Name", 
                                      "zip code tabulation area": "Zipcode"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd = census_pd[["Zipcode", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate", "White Population", "Native American Population", 
                       "Asian Population", "African American Population", "Other Population"]]

# Visualize
census_pd.head(10)
census_pd["Zipcode"] = census_pd["Zipcode"].astype('int64')


In [None]:
file_to_load = "covid_zip"

# Read Purchasing File and store into Pandas data frame
covid_df = pd.read_csv(file_to_load)
covid_df = covid_df.rename(columns = {"POSTCODE" : "Zipcode"})


In [None]:
merged_df = pd.merge(covid_df, census_pd, on="Zipcode", how="left")
merged_df.drop(columns=["Unnamed: 0"])

In [None]:
file_to_load = "us-zip-code-latitude-and-longitude.csv"

# Read Purchasing File and store into Pandas data frame
lat_lon_df = pd.read_csv(file_to_load, delimiter=";")
lat_lon_df = lat_lon_df.drop(columns=["City", "State", "Timezone", "Daylight savings time flag", "geopoint"])
lat_lon_df = lat_lon_df.rename(columns = {"Zip" : "Zipcode"})

In [None]:
merged_df = pd.merge(merged_df, lat_lon_df, on="Zipcode", how="left")

merged_df

In [None]:
gmaps.configure(api_key=gkey)

# Store 'Lat' and 'Lng' into  locations 
locations = merged_df[["Latitude", "Longitude"]].astype(float)

# Convert Poverty Rate to float and store
# HINT: be sure to handle NaN values
#merged_df = merged_df.dropna()
infection_rate = merged_df["ConfirmedCaseCount"].astype(float)

fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=infection_rate)

fig.add_layer(heat_layer)

fig

In [None]:
x_values = merged_df["Population"]
y_values = merged_df["ConfirmedCaseCount"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Population')
plt.ylabel('ConfirmedCaseCount')
plt.show()

In [None]:
x_values = merged_df["Poverty Count"]
y_values = merged_df["ConfirmedCaseCount"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Poverty Count')
plt.ylabel('ConfirmedCaseCount')
plt.show()

In [None]:
x_values = merged_df["Poverty Rate"]
y_values = merged_df["ConfirmedCaseCount"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Poverty Rate')
plt.ylabel('ConfirmedCaseCount')
plt.show()

In [None]:
x_values = merged_df["White Population"]
y_values = merged_df["ConfirmedCaseCount"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('White Population')
plt.ylabel('ConfirmedCaseCount')
plt.show()

In [None]:
x_values = merged_df["Native American Population"]
y_values = merged_df["ConfirmedCaseCount"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Native American Population')
plt.ylabel('ConfirmedCaseCount')
plt.show()

In [None]:
x_values = merged_df["Asian Population"]
y_values = merged_df["ConfirmedCaseCount"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Asian Population')
plt.ylabel('ConfirmedCaseCount')
plt.show()

In [None]:
x_values = merged_df["African American Population"]
y_values = merged_df["ConfirmedCaseCount"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('African American Population')
plt.ylabel('ConfirmedCaseCount')
plt.show()

In [None]:
x_values = merged_df["Other Population"]
y_values = merged_df["ConfirmedCaseCount"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Other Population')
plt.ylabel('ConfirmedCaseCount')
plt.show()