In [1]:
from texascovid19 import constants
import requests
import pandas as pd
from datetime import date

Fetch each day's data.  The data is "current as of 8 p.m. the day before reporting."

In [2]:
def get_arcgis_xhr_data(url):
    r = requests.get(url, verify=False)
    r.raise_for_status()
    data = r.json()['features']
    norm_data = []
    for datum in data:
        norm_data.append(datum['attributes'])
    return pd.DataFrame(data=norm_data)

In [3]:
print("loading city")
df_city = get_arcgis_xhr_data(constants.WILCO_CITY_XHR_URL)

print("loading gender")
df_gender = get_arcgis_xhr_data(constants.WILCO_GENDER_XHR_URL)

print("loading age")
df_age = get_arcgis_xhr_data(constants.WILCO_AGE_DISTRO_XHR_URL)

print("loading status")
df_status = get_arcgis_xhr_data(constants.WILCO_STATUS_XHR_URL)

loading city




loading gender
loading age




loading status


In [4]:
df_city = (df_city
           .rename(columns={"City_of_Residence": "City of Residence", "Number_of_Cases": "Number of Cases"})
           .drop('OBJECTID', axis=1)
           .set_index("City of Residence"))

In [5]:
df_city

Unnamed: 0_level_0,Number of Cases,Case_Percentage,Total_Cases,Recovered,OpenCases,Deceased
City of Residence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Austin,29,6.0,520,21,8,0
Cedar Park,38,7.0,520,27,11,0
Georgetown,67,13.0,520,36,29,2
Hutto,36,7.0,520,26,9,1
Leander,41,8.0,520,35,6,0
Other,55,11.0,520,36,19,0
Round Rock,254,49.0,520,111,126,17


In [6]:
df_gender = df_gender.rename(columns={
    "MalePercentage": "Male",
    "FemalePercentage": "Female"
}).T
df_gender.columns = ["Percentage"]
df_gender.index.name = "Gender"

In [7]:
df_gender

Unnamed: 0_level_0,Percentage
Gender,Unnamed: 1_level_1
Male,48.0
Female,52.0


In [8]:
df_age = df_age.rename(columns={
    "Q_0_to_17": "0 to 17",
    "Q_18_to_40": "18 to 40",
    "Q_41_to_60": "41 to 60",
    "Over_60": "Over 60"
}).T.convert_dtypes(convert_integer=True)
df_age.columns = ["Number of Cases"]
df_age.index.name = "Age Group"

In [9]:
df_age

Unnamed: 0_level_0,Number of Cases
Age Group,Unnamed: 1_level_1
0 to 17,18
18 to 40,161
41 to 60,194
Q_61_to_80,110
Over_80,35


In [10]:
df_status = df_status.rename(columns={
    "HospitalizedPercentage": "Hospitalized Percentage",
    "DeathsPercentage": "Deaths Percentage",
    "RecoverdPercentage": "Recovered Percentage",
    "Total_Cases": "Total Cases",
    "ICUPercentage": "ICU Percentage",
    "VentilatorPercentage": "Ventilator Percentage",
    "Travel_Acquired": "Travel Acquired",
    "TravelAcquiredPercentage": "Travel Acquired Percentage",
    "Locally_Acquired": "Locally Acquired",
    "LocallyAcquiredPercentage": "Locally Acquired Percentage"
}).T.drop('OBJECTID', axis=0)
df_status.columns = ["Number of Cases"]
df_status.index.name = "Status"

In [11]:
df_status

Unnamed: 0_level_0,Number of Cases
Status,Unnamed: 1_level_1
Current_Hospitalized,24.0
Current_Hospitalized_Percentag,5.0
Hospitalized,82.0
Hospitalized Percentage,16.0
Deaths,20.0
Deaths Percentage,4.0
Recovered,292.0
Recovered Percentage,56.0
ICU,41.0
ICU Percentage,50.0


In [12]:
today = date.today().isoformat()

In [13]:
wilco_path = f"{constants.DATA_PATH}/wilco"

In [14]:
def write_csv(categ, timeseries_column, dt, df):
    df = df.copy()
    df.to_csv(f"{wilco_path}/{categ}/{today}.csv", header=True, index=True)
    # append to timeseries
    timeseries_csv_name = f"{wilco_path}/{categ}/timeseries.csv"
    df.rename(columns={timeseries_column: today}, inplace=True)
    try:
        df_timeseries = pd.read_csv(timeseries_csv_name).set_index(df.index.name)
        if today in df_timeseries.columns:
            df_timeseries.drop(columns=today, inplace=True)
        df_timeseries = pd.concat([df_timeseries, df[[today]]] , axis=1).rename_axis(df.index.name)
        df_timeseries.to_csv(timeseries_csv_name, header=True, index=True)
    except FileNotFoundError as e:
        print(f"Couldn't find file {timeseries_csv_name}")
        df[[today]].to_csv(timeseries_csv_name, header=True, index=True)



In [15]:
write_csv("bycity", "Number of Cases", today, df_city)

In [16]:
write_csv("byage", "Number of Cases", today, df_age)

In [17]:
write_csv("bygender", "Percentage", today, df_gender)

In [18]:
write_csv("bystatus", "Number of Cases", today, df_status)