In [1]:
from texascovid19 import constants
import requests
import pandas as pd
from datetime import date

Fetch each day's data.  The data is "current as of 8 p.m. the day before reporting."

In [2]:
def get_arcgis_xhr_data(url):
    r = requests.get(url, verify=False)
    r.raise_for_status()
    data = r.json()['features']
    norm_data = []
    for datum in data:
        norm_data.append(datum['attributes'])
    return pd.DataFrame(data=norm_data)

In [3]:
df_city = get_arcgis_xhr_data(constants.WILCO_CITY_XHR_URL)
df_gender = get_arcgis_xhr_data(constants.WILCO_GENDER_XHR_URL)
df_age = get_arcgis_xhr_data(constants.WILCO_AGE_DISTRO_XHR_URL)
df_status = get_arcgis_xhr_data(constants.WILCO_STATUS_XHR_URL)



In [4]:
df_city = (df_city
           .rename(columns={"City_of_Residence": "City of Residence", "value": "Number of Cases"})
           .set_index("City of Residence"))

In [5]:
df_city

Unnamed: 0_level_0,Number of Cases
City of Residence,Unnamed: 1_level_1
Austin,16
Cedar Park,22
Georgetown,32
Hutto,15
Leander,26
Other,19
Round Rock,81


In [6]:
df_gender = df_gender.rename(columns={
    "MalePercentage": "Male",
    "FemalePercentage": "Female"
}).T
df_gender.columns = ["Percentage"]
df_gender.index.name = "Gender"

In [7]:
df_gender

Unnamed: 0_level_0,Percentage
Gender,Unnamed: 1_level_1
Male,51.0
Female,49.0


In [8]:
df_age = df_age.rename(columns={
    "Q_0_to_17": "0 to 17",
    "Q_18_to_40": "18 to 40",
    "Q_41_to_60": "41 to 60",
    "Over_60": "Over 60"
}).T.convert_dtypes(convert_integer=True)
df_age.columns = ["Number of Cases"]
df_age.index.name = "Age Group"

In [9]:
df_age

Unnamed: 0_level_0,Number of Cases
Age Group,Unnamed: 1_level_1
0 to 17,8
18 to 40,66
41 to 60,88
Over 60,49


In [10]:
df_status = df_status.rename(columns={
    "HospitalizedPercentage": "Hospitalized Percentage",
    "DeathsPercentage": "Deaths Percentage",
    "RecoverdPercentage": "Recovered Percentage",
    "Total_Cases": "Total Cases",
    "ICUPercentage": "ICU Percentage",
    "VentilatorPercentage": "Ventilator Percentage",
    "Travel_Acquired": "Travel Acquired",
    "TravelAcquiredPercentage": "Travel Acquired Percentage",
    "Locally_Acquired": "Locally Acquired",
    "LocallyAcquiredPercentage": "Locally Acquired Percentage"
}).T.drop('OBJECTID', axis=0)
df_status.columns = ["Number of Cases"]
df_status.index.name = "Status"

In [11]:
df_status

Unnamed: 0_level_0,Number of Cases
Status,Unnamed: 1_level_1
Hospitalized,34.0
Hospitalized Percentage,16.0
Deaths,6.0
Deaths Percentage,3.0
Recovered,127.0
Recovered Percentage,60.0
ICU,21.0
ICU Percentage,62.0
Ventilator,15.0
Ventilator Percentage,44.0


In [12]:
today = date.today().isoformat()

In [13]:
wilco_path = f"{constants.DATA_PATH}/wilco"

In [14]:
def write_csv(categ, timeseries_column, dt, df):
    df = df.copy()
    df.to_csv(f"{wilco_path}/{categ}/{today}.csv", header=True, index=True)
    # append to timeseries
    timeseries_csv_name = f"{wilco_path}/{categ}/timeseries.csv"
    df.rename(columns={timeseries_column: today}, inplace=True)
    try:
        df_timeseries = pd.read_csv(timeseries_csv_name).set_index(df.index.name)
        if today in df_timeseries.columns:
            df_timeseries.drop(columns=today, inplace=True)
        df_timeseries = pd.concat([df_timeseries, df[[today]]] , axis=1).rename_axis(df.index.name)
        df_timeseries.to_csv(timeseries_csv_name, header=True, index=True)
    except FileNotFoundError as e:
        print(f"Couldn't find file {timeseries_csv_name}")
        df[[today]].to_csv(timeseries_csv_name, header=True, index=True)



In [15]:
write_csv("bycity", "Number of Cases", today, df_city)

In [16]:
write_csv("byage", "Number of Cases", today, df_age)

In [17]:
write_csv("bygender", "Percentage", today, df_gender)

In [18]:
write_csv("bystatus", "Number of Cases", today, df_status)