<h1>Import dependencies</h1>

In [1]:
import pandas as pd
import numpy as np

<h1>Extract and Load Data</h1>

In [2]:
file = 'data/ca_countycovid.csv'
df = pd.read_csv(file)
df.head()

Unnamed: 0,date,county,fips,confirmed_cases,deaths,new_confirmed_cases,new_deaths
0,1/8/21,Alameda,1,57824,729,804.0,16.0
1,1/8/21,Alpine,3,72,0,1.0,0.0
2,1/8/21,Amador,5,2801,26,40.0,0.0
3,1/8/21,Butte,7,8359,111,142.0,9.0
4,1/8/21,Calaveras,9,922,22,24.0,0.0


In [3]:
print(f'Start Date {df.date.max()}\nEnd Date {df.date.min()}')

Start Date 9/9/20
End Date 1/1/21


In [4]:
county_data = df[df['date'] =='1/8/21']
county_data = county_data.drop(columns=['fips', 'date', 'new_confirmed_cases', 'new_deaths'])
county_data['cfdr'] = (county_data['deaths'] / county_data['confirmed_cases'] *100).round(2)
county_data.head(3)

Unnamed: 0,county,confirmed_cases,deaths,cfdr
0,Alameda,57824,729,1.26
1,Alpine,72,0,0.0
2,Amador,2801,26,0.93


<h1>Import County FIPS Data for Mapping</h1>

In [5]:
fips_file = 'data/us_fips_codes.csv'
fields = ['fips', 'county_name', 'state_abbr', 'state_name']
iter_csv = pd.read_csv(fips_file, usecols=fields, iterator=True, chunksize=200)
fips_df = pd.concat([chunk[chunk['state_name'] == 'California'] for chunk in iter_csv])

<h1>Clean Data for merged</h1>

In [6]:
fips_df['county_name'] = fips_df['county_name'].replace(' County','', regex=True) # drop county
fips_df.reset_index(drop=True, inplace=True) 
fips_df.head(3)

Unnamed: 0,fips,county_name,state_abbr,state_name
0,6001,Alameda,CA,California
1,6003,Alpine,CA,California
2,6005,Amador,CA,California


<h1>Combine Dataframes by county name</h1>

In [8]:
ca_fips_df = county_data.merge(fips_df, left_on='county', right_on='county_name')
ca_fips_df = ca_fips_df.drop(columns=['county_name'])
ca_fips_df.head(3)

Unnamed: 0,county,confirmed_cases,deaths,cfdr,fips,state_abbr,state_name
0,Alameda,57824,729,1.26,6001,CA,California
1,Alpine,72,0,0.0,6003,CA,California
2,Amador,2801,26,0.93,6005,CA,California


<h1>Export Dataframe as csv file</h1>

In [9]:
ca_fips_df.to_csv('data/ca_c19_fips.csv')

In [10]:
ca_fips_df.head()

Unnamed: 0,county,confirmed_cases,deaths,cfdr,fips,state_abbr,state_name
0,Alameda,57824,729,1.26,6001,CA,California
1,Alpine,72,0,0.0,6003,CA,California
2,Amador,2801,26,0.93,6005,CA,California
3,Butte,8359,111,1.33,6007,CA,California
4,Calaveras,922,22,2.39,6009,CA,California
