## Bay area cases and deaths

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv',
                sep = ',')

In [4]:
counties = ['Alameda', 'Contra Costa', 'Marin', 'Monterey', 'Napa', 'San Francisco',
           'San Joaquin', 'San Mateo', 'Santa Clara', 'Santa Cruz', 'Solano', 'Sonoma']
populations = [1663000, 1147000, 260955, 437907, 140973, 884363, 745424, 
              770000, 1938000, 275897, 445458, 504217]

In [5]:
counties = pd.DataFrame({'county':counties, 'population':populations})

In [6]:
# filter the nyt dataset by the bay area counties
df = df[(np.isin(df['county'],counties)) & (df['state'] == 'California')]

In [7]:
df = pd.merge(df, counties, how = 'left', on = 'county')

In [14]:
df.sort_values(by = ['county', 'date'], inplace = True)

In [16]:
df[['cases_1000', 'deaths_1000']] = df[['cases', 'deaths']].div(df['population'].values, axis = 0)*1000


In [19]:
df[['new_cases', 'new_deaths']] = df.groupby(['county'])[['cases', 'deaths']].transform(lambda x: x.diff()) 
df[['roll_new_cases', 'roll_new_deaths']] = df.groupby('county')[['new_cases', 'new_deaths']].transform(lambda x: x.rolling(7, 1).mean())

In [20]:
df.to_csv('bay_area_cases_deaths_clean.csv')

## Santa Clara County cases, deaths, hospitalizations, and testing

In [21]:
#filter dataframe to Santa Clara only
scc = df[df['county'] == 'Santa Clara']

In [38]:
hosp  = pd.read_csv('https://data.sccgov.org/resource/5xkz-6esm.csv')

In [39]:
hosp = hosp[['date', 'icu_covid', 'covid_total_7davg', 'available_total', 'vents_pts', 'vents_available']]

In [40]:
hosp[['roll_icu_covid', 'roll_available_total']] = hosp[['icu_covid', 'available_total']].transform(lambda x: x.rolling(7, 1).mean())

In [46]:
hosp['date'] = pd.to_datetime(hosp.date)
scc['date'] = pd.to_datetime(scc.date)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [47]:
scc = pd.merge(scc, hosp, on = 'date')

#### Get testing data

In [67]:
test = pd.read_csv('https://data.sccgov.org/resource/dvgc-tzgq.csv')

In [68]:
test.rename(columns = {'collection_date':'date'}, inplace = True)
test['date'] = pd.to_datetime(test.date)

In [69]:
test = test[['date', 'rate_pst_7d']]

In [70]:
scc = pd.merge(scc, test, on = 'date')

In [71]:
scc.to_csv('scc_cases_deaths_clean.csv')