## Bay area cases and deaths

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv',
                sep = ',')

In [4]:
counties = ['Alameda', 'Contra Costa', 'Marin', 'Monterey', 'Napa', 'San Francisco',
           'San Joaquin', 'San Mateo', 'Santa Clara', 'Santa Cruz', 'Solano', 'Sonoma']
populations = [1663000, 1147000, 260955, 437907, 140973, 884363, 745424, 
              770000, 1938000, 275897, 445458, 504217]

In [5]:
counties = pd.DataFrame({'county':counties, 'population':populations})

In [6]:
# filter the nyt dataset by the bay area counties
df = df[(np.isin(df['county'],counties)) & (df['state'] == 'California')]

In [8]:
df.head()

Unnamed: 0,date,county,state,fips,cases,deaths,population
0,2020-01-31,Santa Clara,California,6085.0,1,0.0,1938000
1,2020-02-01,Santa Clara,California,6085.0,1,0.0,1938000
2,2020-02-02,San Francisco,California,6075.0,2,0.0,884363
3,2020-02-02,Santa Clara,California,6085.0,2,0.0,1938000
4,2020-02-03,San Francisco,California,6075.0,2,0.0,884363


In [7]:
df = pd.merge(df, counties, how = 'left', on = 'county')

In [10]:
df.head()

Unnamed: 0,date,county,state,fips,cases,deaths,population
0,2020-01-31,Santa Clara,California,6085.0,1,0.0,1938000
1,2020-02-01,Santa Clara,California,6085.0,1,0.0,1938000
2,2020-02-02,San Francisco,California,6075.0,2,0.0,884363
3,2020-02-02,Santa Clara,California,6085.0,2,0.0,1938000
4,2020-02-03,San Francisco,California,6075.0,2,0.0,884363


In [14]:
df.sort_values(by = ['county', 'date'], inplace = True)

In [15]:
df.head()

Unnamed: 0,date,county,state,fips,cases,deaths,population
76,2020-03-01,Alameda,California,6001.0,1,0.0,1663000
83,2020-03-02,Alameda,California,6001.0,1,0.0,1663000
91,2020-03-03,Alameda,California,6001.0,2,0.0,1663000
100,2020-03-04,Alameda,California,6001.0,2,0.0,1663000
109,2020-03-05,Alameda,California,6001.0,2,0.0,1663000


In [16]:
df[['cases_1000', 'deaths_1000']] = df[['cases', 'deaths']].div(df['population'].values, axis = 0)*1000


In [17]:
df.head()

Unnamed: 0,date,county,state,fips,cases,deaths,population,cases_1000,deaths_1000
76,2020-03-01,Alameda,California,6001.0,1,0.0,1663000,0.000601,0.0
83,2020-03-02,Alameda,California,6001.0,1,0.0,1663000,0.000601,0.0
91,2020-03-03,Alameda,California,6001.0,2,0.0,1663000,0.001203,0.0
100,2020-03-04,Alameda,California,6001.0,2,0.0,1663000,0.001203,0.0
109,2020-03-05,Alameda,California,6001.0,2,0.0,1663000,0.001203,0.0


In [19]:
df[['new_cases', 'new_deaths']] = df.groupby(['county'])[['cases', 'deaths']].transform(lambda x: x.diff()) 
df[['roll_new_cases', 'roll_new_deaths']] = df.groupby('county')[['new_cases', 'new_deaths']].transform(lambda x: x.rolling(7, 1).mean())

In [20]:
df.to_csv('bay_area_cases_deaths_clean.csv')

## Santa Clara County cases, deaths, hospitalizations, and testing

In [21]:
#filter dataframe to Santa Clara only
scc = df[df['county'] == 'Santa Clara']

In [22]:
hosp  = pd.read_csv('https://data.sccgov.org/resource/5xkz-6esm.csv')

In [23]:
hosp[['covid_total_7davg', 'available_total', '']]

Unnamed: 0,date,icu_covid,icu_pui,icu_other,icu_available,non_icu_covid,non_icu_pui,non_icu_other,non_icu_available,vents_pts,...,covid_pui_pct_7davg,other_total,available_total,icu_covid_pct_7davg,icu_covid_pui_pct_7davg,non_icu_covid_pct_7davg,non_icu_covid_pui_pct_7davg,cap_pct_7davg_total,icu_cap_pct_7davg,non_icu_cap_pct_7davg
0,2020-03-27T00:00:00.000,63,22,,66,62,52,,293,95,...,0.36,0,359,0.42,0.56,0.15,0.28,0.64,0.44,0.72
1,2020-03-28T00:00:00.000,66,30,,87,75,64,,403,90,...,0.34,0,490,0.39,0.54,0.14,0.27,0.66,0.46,0.73
2,2020-03-29T00:00:00.000,69,13,,91,81,49,,549,115,...,0.30,0,640,0.39,0.52,0.13,0.24,0.70,0.48,0.76
3,2020-03-30T00:00:00.000,72,34,,68,82,74,,579,110,...,0.30,0,647,0.40,0.54,0.13,0.23,0.70,0.46,0.77
4,2020-03-31T00:00:00.000,73,19,,73,76,68,,630,107,...,0.29,0,703,0.41,0.54,0.12,0.22,0.71,0.46,0.78
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
252,2020-12-04T00:00:00.000,83,0,204.0,47,230,13,1486.0,297,171,...,0.13,1690,344,0.24,0.24,0.10,0.11,0.17,0.16,0.17
253,2020-12-05T00:00:00.000,82,1,193.0,54,245,18,1425.0,333,170,...,0.13,1618,387,0.24,0.24,0.11,0.11,0.16,0.15,0.17
254,2020-12-06T00:00:00.000,86,0,193.0,50,271,20,1373.0,364,167,...,0.14,1566,414,0.25,0.25,0.11,0.12,0.17,0.15,0.17
255,2020-12-07T00:00:00.000,90,4,173.0,52,285,26,1364.0,364,168,...,0.15,1537,416,0.25,0.25,0.12,0.12,0.16,0.15,0.17


In [None]:
scc.to_csv('scc_cases_deaths_clean.csv')