# California COVID vaccinations scrape

By [Amy O'Kruk](https://twitter.com/amyokruk)

Downloads data on vaccine doses administered by county and statewide from a Tableau-powered dashboard from the California Department of Public Health.

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import json
import re
import time
from time import gmtime, strftime 
import os
import pytz
from datetime import datetime

Scrape the dashboard page

In [2]:
url = "https://public.tableau.com/interactive/views/COVID-19VaccineDashboardPublic/Vaccine?:embed=y&:showVizHome=n&:apiID=host0"

r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")

In [3]:
tableauData = json.loads(soup.find("textarea",{"id": "tsConfigContainer"}).text)

Get the link to the Tableau data

In [4]:
dataUrl = f'https://public.tableau.com{tableauData["vizql_root"]}/bootstrapSession/sessions/{tableauData["sessionid"]}'

In [5]:
r = requests.post(dataUrl, data= {
    "sheet_id": tableauData["sheetId"],
})

dataReg = re.search('\d+;({.*})\d+;({.*})', r.text, re.MULTILINE)
data1 = json.loads(dataReg.group(2))

dataJson = data1["secondaryInfo"]["presModelMap"]["dataDictionary"]["presModelHolder"]["genDataDictionaryPresModel"]["dataSegments"]["0"]["dataColumns"]

Isolate what you want

In [6]:
counties = dataJson[2]['dataValues'][:58]

In [7]:
doses = dataJson[0]['dataValues'][3:61]

Data formatting

In [8]:
zipped = dict(zip(counties, doses))

In [9]:
df = pd.Series(zipped).reset_index()

In [10]:
df.columns = ['location','doses']

Grab the overall California total

In [11]:
add = {'location':'California','doses':dataJson[0]['dataValues'][2]}

In [12]:
df = df.append(add, ignore_index=True)

In [13]:
df = df.sort_values(by='location')

In [14]:
df = df[df.location == 'California'].append(df[df.location != 'California']).reset_index(drop=True)

In [15]:
tz = pytz.timezone("America/New_York")

In [16]:
today = datetime.now(tz).date()

In [17]:
data_dir = os.path.join(os.path.abspath(""), "data")

In [18]:
df.to_csv(os.path.join(data_dir, f"{today}.csv"), index=False)