In [1]:
# dependencies

import gmaps
import numpy as np
import pandas as pd
import requests
import time
from scipy.stats import linregress
from matplotlib import pyplot as plt

In [2]:
ed_data = pd.read_csv("../resources/NEDI2018statedata.csv")
ed_data.head()

Unnamed: 0,Name,Abbreviation,Total # Eds,Total ED visit volume,Median visit volume,IQR visit volume,Unnamed: 6,<10k,10-19k,20-29k,30-39k,40-49k,>50k
0,Alaska,AK,20,261985,5979,2270,16180,13,3,0,3,0,1
1,Alabama,AL,98,2825756,22197,11000,42087,18,28,11,15,8,18
2,Arkansas,AR,79,1730571,12350,7000,32799,30,19,9,5,4,12
3,Arizona,AZ,88,3119364,29200,12775,50000,14,20,13,10,8,23
4,California,CA,341,15787403,41975,24000,67630,37,36,37,54,44,133


In [3]:
mortality_data = pd.read_csv("../resources/us-counties.csv")
mortality_data.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0
3,2020-01-24,Cook,Illinois,17031.0,1,0.0
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0


In [4]:
unique_value = mortality_data["state"].nunique()
unique_value

55

In [5]:
ed_data["Name"].nunique()

51

In [6]:
# groupby mortality_data on state name with sum on deaths

grp_mortality = mortality_data.drop(columns=["fips", "date"])

grp_mortality = grp_mortality.groupby(["state"]).sum().reset_index(["state"])
grp_mortality = grp_mortality.rename(columns={"state": "Name"})

#add death rate column death/cases
grp_mortality["Death Rate"] = round((grp_mortality["deaths"].astype(int)/grp_mortality["cases"].astype(int))*100,2)
grp_mortality.head()


Unnamed: 0,Name,cases,deaths,Death Rate
0,Alabama,46078611,728745.0,1.58
1,Alaska,4544475,20735.0,0.46
2,Arizona,69132576,1418028.0,2.05
3,Arkansas,27248041,428643.0,1.57
4,California,271972863,4177868.0,1.54


In [9]:
# remove virgin islands, guam, n mariana islands
# retain Puerto Rico
# merge dataframes on state name to retain the states in the ed_data
# this eliminates Puerto Rico from the dataset

merged_ed_mort = pd.merge(ed_data, grp_mortality, how = "left", on = "Name").sort_values("Name")
merged_ed_mort

Unnamed: 0,Name,Abbreviation,Total # Eds,Total ED visit volume,Median visit volume,IQR visit volume,Unnamed: 6,<10k,10-19k,20-29k,30-39k,40-49k,>50k,cases,deaths,Death Rate
1,Alabama,AL,98,2825756,22197,11000,42087,18,28,11,15,8,18,46078611,728745.0,1.58
0,Alaska,AK,20,261985,5979,2270,16180,13,3,0,3,0,1,4544475,20735.0,0.46
3,Arizona,AZ,88,3119364,29200,12775,50000,14,20,13,10,8,23,69132576,1418028.0,2.05
2,Arkansas,AR,79,1730571,12350,7000,32799,30,19,9,5,4,12,27248041,428643.0,1.57
4,California,CA,341,15787403,41975,24000,67630,37,36,37,54,44,133,271972863,4177868.0,1.54
5,Colorado,CO,131,2515825,10220,4500,23725,64,29,10,9,4,15,35779317,721293.0,2.02
6,Connecticut,CT,35,1666267,39000,25000,61983,0,6,5,7,2,15,24625667,1338181.0,5.43
8,Delaware,DE,11,523043,43800,28369,62474,0,1,2,2,1,5,7447062,179045.0,2.4
7,District of Columbia,DC,10,591681,53749,36439,75000,0,1,0,2,1,6,4749053,174462.0,3.67
9,Florida,FL,266,11601824,37410,21900,60000,21,38,44,42,28,93,189474338,3429542.0,1.81
