## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [22]:
%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [23]:
import os
import pathlib

In [24]:
this_dir = pathlib.Path(os.path.abspath(""))

In [25]:
data_dir = this_dir / "data"

In [26]:
import requests
from bs4 import BeautifulSoup

import json
import re

import pandas as pd
import numpy as np

import pytz
import glob
from datetime import datetime

## Download

Paths to Dashboard

In [27]:
host_url = "https://public.tableau.com"
path = "/views/TITLEDStorylinewithallinone/Storyline2"

Retrive Dashboard data

In [28]:
url = f"{host_url}{path}"

r = requests.get(url, params={":embed": "y", ":showVizHome": "no"})
soup = BeautifulSoup(r.text, "html.parser")

tableauData = json.loads(soup.find("textarea", {"id": "tsConfigContainer"}).text)

dataUrl = f'{host_url}{tableauData["vizql_root"]}/bootstrapSession/sessions/{tableauData["sessionid"]}'

r = requests.post(
    dataUrl,
    data={
        "sheet_id": tableauData["sheetId"],
    },
)

dataReg = re.search("\d+;({.*})\d+;({.*})", r.text, re.MULTILINE)
info = json.loads(dataReg.group(1))
data = json.loads(dataReg.group(2))

## Parse

Filter out the cities data

In [29]:
columnsData = info["worldUpdate"]["applicationPresModel"]["workbookPresModel"][
    "dashboardPresModel"
]["zones"]["5"]["presModelHolder"]["flipboard"]["storyPoints"]["1"][
    "dashboardPresModel"
][
    "zones"
][
    "33"
][
    "presModelHolder"
][
    "visual"
][
    "vizData"
][
    "paneColumnsData"
]
result = [
    {
        "fieldCaption": t.get("fieldCaption", ""),
        "valueIndices": columnsData["paneColumnsList"][t["paneIndices"][0]][
            "vizPaneColumns"
        ][t["columnIndices"][0]]["valueIndices"],
        "aliasIndices": columnsData["paneColumnsList"][t["paneIndices"][0]][
            "vizPaneColumns"
        ][t["columnIndices"][0]]["aliasIndices"],
        "dataType": t.get("dataType"),
        "paneIndices": t["paneIndices"][0],
        "columnIndices": t["columnIndices"][0],
    }
    for t in columnsData["vizDataColumns"]
    if t.get("fieldCaption")
]
dataFull = data["secondaryInfo"]["presModelMap"]["dataDictionary"]["presModelHolder"][
    "genDataDictionaryPresModel"
]["dataSegments"]["0"]["dataColumns"]


def onAlias(it, value, cstring):
    return value[it] if (it >= 0) else cstring["dataValues"][abs(it) - 1]


frameData = {}
cstring = [t for t in dataFull if t["dataType"] == "cstring"][0]
for t in dataFull:
    for index in result:
        if t["dataType"] == index["dataType"]:
            if len(index["valueIndices"]) > 0:
                frameData[f'{index["fieldCaption"]}-value'] = [
                    t["dataValues"][abs(it)] for it in index["valueIndices"]
                ]
            if len(index["aliasIndices"]) > 0:
                filterOut = [
                    onAlias(it, t["dataValues"], cstring)
                    for it in index["aliasIndices"]
                ]
                try:
                    if filterOut[0] > 1:
                        frameData[f'{index["fieldCaption"]}-alias'] = [
                            onAlias(it, t["dataValues"], cstring)
                            for it in index["aliasIndices"]
                        ]
                except:
                    frameData[f'{index["fieldCaption"]}-alias'] = [
                        onAlias(it, t["dataValues"], cstring)
                        for it in index["aliasIndices"]
                    ]

Get Correctional facilities data

In [30]:
columnsData = info["worldUpdate"]["applicationPresModel"]["workbookPresModel"][
    "dashboardPresModel"
]["zones"]["5"]["presModelHolder"]["flipboard"]["storyPoints"]["1"][
    "dashboardPresModel"
][
    "zones"
][
    "8"
][
    "presModelHolder"
][
    "visual"
][
    "vizData"
][
    "paneColumnsData"
]
result = [
    {
        "fieldCaption": t.get("fieldCaption", ""),
        "valueIndices": columnsData["paneColumnsList"][t["paneIndices"][0]][
            "vizPaneColumns"
        ][t["columnIndices"][0]]["valueIndices"],
        "aliasIndices": columnsData["paneColumnsList"][t["paneIndices"][0]][
            "vizPaneColumns"
        ][t["columnIndices"][0]]["aliasIndices"],
        "dataType": t.get("dataType"),
        "paneIndices": t["paneIndices"][0],
        "columnIndices": t["columnIndices"][0],
    }
    for t in columnsData["vizDataColumns"]
    if t.get("fieldCaption")
]
dataFull = data["secondaryInfo"]["presModelMap"]["dataDictionary"]["presModelHolder"][
    "genDataDictionaryPresModel"
]["dataSegments"]["0"]["dataColumns"]


def onAlias(it, value, cstring):
    return value[it] if (it >= 0) else cstring["dataValues"][abs(it) - 1]


frameData_2 = {}
cstring = [t for t in dataFull if t["dataType"] == "cstring"][0]
for t in dataFull:
    for index in result:
        if t["dataType"] == index["dataType"]:
            if len(index["valueIndices"]) > 0:
                frameData_2[f'{index["fieldCaption"]}-value'] = [
                    t["dataValues"][abs(it)] for it in index["valueIndices"]
                ]
            #                 print(frameData[f'{index["fieldCaption"]}-value'])
            if len(index["aliasIndices"]) > 0:
                filterOut = [
                    onAlias(it, t["dataValues"], cstring)
                    for it in index["aliasIndices"]
                ]
                try:
                    if filterOut[0] > 1:
                        frameData_2[f'{index["fieldCaption"]}-alias'] = [
                            onAlias(it, t["dataValues"], cstring)
                            for it in index["aliasIndices"]
                        ]
                except:
                    frameData_2[f'{index["fieldCaption"]}-alias'] = [
                        onAlias(it, t["dataValues"], cstring)
                        for it in index["aliasIndices"]
                    ]

Turn both into DataFrames

In [31]:
df = pd.DataFrame.from_dict(frameData, orient="index").fillna(0).T
df_2 = pd.DataFrame.from_dict(frameData_2, orient="index").fillna(0).T

Just get correctional facilities cases from second dataframe

In [32]:
df_2

Unnamed: 0,Measure Values-alias,Kings County-value,Kings County-alias,Measure Names-value,Measure Names-alias
0,598,Daily Increase,Daily Increase,[federated.0r1lg6o1tllpw911zqzhi0opa5j0].[sum:...,Tests
1,499940,Total,Total,[federated.0r1lg6o1tllpw911zqzhi0opa5j0].[sum:...,Tests
2,158646,State Correctional Facility (Inmate),State Correctional Facility (Inmate),[federated.0r1lg6o1tllpw911zqzhi0opa5j0].[sum:...,Tests
3,341294,Community (Non-inmate),Community (Non-inmate),[federated.0r1lg6o1tllpw911zqzhi0opa5j0].[sum:...,Tests
4,5,Daily Increase,Daily Increase,[federated.0r1lg6o1tllpw911zqzhi0opa5j0].[sum:...,Deaths
5,303,Total,Total,[federated.0r1lg6o1tllpw911zqzhi0opa5j0].[sum:...,Deaths
6,19,State Correctional Facility (Inmate),State Correctional Facility (Inmate),[federated.0r1lg6o1tllpw911zqzhi0opa5j0].[sum:...,Deaths
7,284,Community (Non-inmate),Community (Non-inmate),[federated.0r1lg6o1tllpw911zqzhi0opa5j0].[sum:...,Deaths
8,35,Daily Increase,Daily Increase,[federated.0r1lg6o1tllpw911zqzhi0opa5j0].[sum:...,Cases
9,29843,Total,Total,[federated.0r1lg6o1tllpw911zqzhi0opa5j0].[sum:...,Cases


In [33]:
df_2 = df_2[df_2["Measure Names-alias"] == "Cases"]

In [34]:
df_2 = df_2[df_2["Kings County-alias"] == "State Correctional Facility (Inmate)"]

In [35]:
df_2 = df_2[["Kings County-alias", "Measure Values-alias"]]

In [36]:
df_2 = df_2.rename(
    columns={"Kings County1-alias": "area", "Measure Values-alias": "confirmed_cases"}
)

Rename city dataframe columns

In [37]:
df = df.rename(
    columns={"City-alias": "area", "SUM(Number of Cases)-alias": "confirmed_cases"}
)

Reorder columns, Combine and add date, Kings County, and fix text

In [38]:
df_final = pd.concat([df, df_2])

In [39]:
df_final["county"] = "Kings"
df_final = df_final[["county", "area", "confirmed_cases"]]
df_final = df_final.replace(
    "State Correctional Facility (Inmate)", "Correctional facilities"
)

Remove any commas from cases column

In [40]:
df_final["confirmed_cases"] = df_final.confirmed_cases.replace(",", "", regex=True)

Dig up updated time

In [41]:
date_sentence = info["worldUpdate"]["applicationPresModel"]["workbookPresModel"][
    "dashboardPresModel"
]["zones"]["5"]["presModelHolder"]["flipboard"]["storyPoints"]["1"][
    "dashboardPresModel"
][
    "zones"
][
    "43"
][
    "zoneCommon"
][
    "name"
]

In [42]:
date_strings = date_sentence.replace("Overview Last Updated on ", "").split(" at ")

In [43]:
date = date_strings[0]

In [44]:
df_final["county_date"] = pd.to_datetime(date)

## Vet

In [45]:
default_kings_len = 9

In [46]:
try:
    assert not len(df_final) > default_kings_len
except AssertionError:
    raise AssertionError("Kings County has more rows than before")

In [47]:
try:
    assert not len(df_final) < default_kings_len
except AssertionError:
    raise AssertionError("Kings County has missing row(s)")

## Export

Set the date

In [48]:
tz = pytz.timezone("America/Los_Angeles")

In [49]:
today = datetime.now(tz).date()

In [50]:
slug = "kings"

In [51]:
df_final.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [52]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [53]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"])
        df["date"] = file_date
    df_list.append(df)

In [54]:
df = pd.concat(df_list).sort_values(["date", "area"])

In [55]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)