# California COVID vaccinations scrape

By [Amy O'Kruk](https://twitter.com/amyokruk)

Downloads data on vaccine doses administered by county and statewide from a Tableau-powered dashboard from the California Department of Public Health.

In [41]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import json
import re
import time
from time import gmtime, strftime 
import os

In [None]:
msgs = ['hello, world!']

Scrape the dashboard page

In [42]:
url = "https://public.tableau.com/interactive/views/COVID-19VaccineDashboardPublic/Vaccine?:embed=y&:showVizHome=n&:apiID=host0"

r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")

In [43]:
tableauData = json.loads(soup.find("textarea",{"id": "tsConfigContainer"}).text)

Get the link to the Tableau data

In [44]:
dataUrl = f'https://public.tableau.com{tableauData["vizql_root"]}/bootstrapSession/sessions/{tableauData["sessionid"]}'

In [45]:
r = requests.post(dataUrl, data= {
    "sheet_id": tableauData["sheetId"],
})

dataReg = re.search('\d+;({.*})\d+;({.*})', r.text, re.MULTILINE)
data1 = json.loads(dataReg.group(2))

dataJson = data1["secondaryInfo"]["presModelMap"]["dataDictionary"]["presModelHolder"]["genDataDictionaryPresModel"]["dataSegments"]["0"]["dataColumns"]

Isolate what you want

In [46]:
counties = dataJson[2]['dataValues'][:58]

In [47]:
doses = dataJson[0]['dataValues'][3:61]

Data formatting

In [48]:
zipped = dict(zip(counties, doses))

In [49]:
df = pd.Series(zipped).reset_index()

In [50]:
df.columns = ['location','doses']

Grab the overall California total

In [51]:
add = {'location':'California','doses':dataJson[0]['dataValues'][2]}

In [52]:
df = df.append(add, ignore_index=True)

In [53]:
df = df.sort_values(by='location')

In [54]:
df = df[df.location == 'California'].append(df[df.location != 'California']).reset_index(drop=True)

In [37]:
today = strftime("%d-%m-%Y", time.localtime())

In [38]:
data_dir = os.path.join(os.path.abspath(""), "data")

In [39]:
df.to_csv(os.path.join(data_dir, f"{today}.csv"), index=False)