In [3]:
import json
import pandas as pd
import os
import re
import requests
from datetime import date
import time
import math
import numpy as np
from bokeh.plotting import figure, show
from bokeh.io import output_notebook

with open('api_key.json') as api_file:
    data = json.load(api_file)
    
api_key = data['apiKey']
url_dict = {"county":"county","metro":"cbsa","state":"state"}
base_url = "https://api.covidactnow.org/v2"
raw_files = os.listdir("raw")

def all_capitalize(string, join_str=" ", split_str=" "):
    return join_str.join(list(map(lambda s: s.capitalize(), string.split(split_str))))

def get_state_row(state):
    abbrevs = pd.read_csv("abbrevs.csv")
    if len(state) == 2:
        return abbrevs[abbrevs["Abbrev"] == state.upper()]
    else:
        return abbrevs[abbrevs["State"] == all_capitalize(state)]
        
def short_state(state):
    state_row = get_state_row(state)
    if state_row.shape[0] == 0:
        print("No such state: " + state)
    else:
        return state_row["Abbrev"].values[0]

def long_state(state):
    state_row = get_state_row(state)
    if state_row.shape[0] == 0:
        print("No such state: " + state)
    else:
        return state_row["State"].values[0]

# County must be in ~all_capitalize(county)~ form.
# State must be in ~short_state(state)~ form.
def get_county_code(county, state):
    county_codes = pd.read_csv("county_codes.csv")
    return county_codes.loc[(county_codes["County Name"] == county) 
                            & (county_codes["State"] == state), 
                            "County Code"].values[0]

# County must be in ~all_capitalize(county)~ form.
# State must be in ~long_state(state)~ form.
def get_metro_code(county, state):
    metro_codes = pd.read_csv("metro_codes.csv")
    return metro_codes.loc[(metro_codes["County Name"]
                            .apply(lambda big: big.find(county) >= 0)) 
                           & (metro_codes["State Name"] == state), 
                           "Metro Code"].values[0]

def url_path(choice, code):
    if choice in url_dict:
        return "/" + url_dict[choice] + "/" + str(code) + ".timeseries.json"
    else:
        print("Not a valid data category: " + choice)

def raw_path(choice, code):
    if choice in url_dict:
        path = "raw/" + choice + "_" + str(code).lower() + "_" + date.today().isoformat() + ".json"
        reqs = choice + "_" + str(code)
        rep = re.compile(reqs)
        fm = list(filter(rep.search, raw_files))
        dm = list(filter(re.compile(date.today().isoformat()).search, fm))
        if len(dm) != 1:
            for f in fm:
                os.remove("raw/" + f)
            
            full_url = base_url + url_path(choice, code)
            r = requests.get(full_url, params=data)
            if r.status_code == 200:
                f = open(path, "w")
                f.write(r.text)
                f.close()
                return path
            else:
                print("Trouble with website: " + r.text)
                print("Status code: " + str(r.status_code))
                print("GET request url: " + full_url)
                
        else:
            del_match = [item for item in fm if item not in dm]
            for f in del_match:
                os.remove(f)
            
            return path
    
    else:
        print("Not a valid data category: " + choice)
        
def clean_path(old_path, date):
    with open(old_path) as raw_file:
        raw_json = json.load(raw_file)
    
    file_base = re.compile("[a-z0-9]+_[a-z0-9]+").search(old_path).group()
    new_path = "clean/" + file_base + ".csv"
    
    actuals = pd.DataFrame(raw_json["actualsTimeseries"])
    metrics = pd.DataFrame(raw_json["metricsTimeseries"])
    actuals = actuals.loc[actuals["date"].apply(lambda s: date.fromisoformat(s)) >= date,
                          ["date",
                           "newCases",
                           "vaccinationsInitiated",
                           "vaccinationsCompleted"]]
    metrics = metrics.loc[metrics["date"].apply(lambda s: date.fromisoformat(s)) >= date,
                          ["caseDensity",
                           "infectionRate",
                           "testPositivityRatio",
                           "icuCapacityRatio",
                           "vaccinationsInitiatedRatio",
                           "vaccinationsCompletedRatio"]].fillna(method="pad").fillna(method="bfill")
    clean = pd.concat([actuals, metrics], axis=1, join="inner")
    clean["tickCaseDensity"] = clean["caseDensity"].apply(lambda d: math.ceil(math.log2(d+0.01)))
    clean["tickInfectionRate"] = clean["infectionRate"].apply(lambda d: math.floor(d*20)-16)
    clean["tickTestPositivityRatio"] = clean["testPositivityRatio"].apply(lambda d: math.floor(d*100/3))
    clean["tickIcuCapacityRatio"] = clean["icuCapacityRatio"].apply(lambda d: max([0, math.floor((0 if np.isnan(d) else d)*20)-13]))
    clean["tickTotal"] = clean["tickCaseDensity"] + clean["tickInfectionRate"] + clean["tickTestPositivityRatio"] + clean["tickIcuCapacityRatio"]
    clean.to_csv(new_path)
    return new_path

def chart(county, min_date):
    county_arr = county_str.split(", ")
    county = all_capitalize(county_arr[0])
    raw_state = county_arr[1]
    ss = short_state(raw_state)
    ls = long_state(raw_state)
    county_code = get_county_code(county, ss)
    metro_code = get_metro_code(county, ls)
    
    print("Provided County Name: " + county)
    print("County Code: " + str(county_code))
    print("Metro Code: " + str(metro_code))
    print("State Abbreviation: " + ss)
    
    raw_county_path = raw_path("county", county_code)
    raw_metro_path = raw_path("metro", metro_code)
    raw_state_path = raw_path("state", ss)
    
    clean_county_path = clean_path(raw_county_path, min_date)
    clean_metro_path = clean_path(raw_metro_path, min_date)
    clean_state_path = clean_path(raw_state_path, min_date)
    
    county_df = pd.read_csv(clean_county_path)
    metro_df = pd.read_csv(clean_metro_path)
    state_df = pd.read_csv(clean_state_path)
    
    dates = state_df["date"] = pd.to_datetime(state_df["date"])
    county_ticks = county_df["tickTotal"]
    metro_ticks = metro_df["tickTotal"]
    state_ticks = state_df["tickTotal"]
    
    output_notebook()
    p = figure(title="Ticks for " + county + ", " + ss, x_axis_label="date", y_axis_label="ticks", x_axis_type='datetime')
    p.line(dates, county_ticks, legend_label="County", line_color="red")
    p.line(dates, metro_ticks, legend_label="Metro", line_color="blue")
    p.line(dates, state_ticks, legend_label="State", line_color="green")
    show(p)

county_str = input("What county do you want covid data for? (Format: <county>, <state>) ")
chart(county_str, date(2020,12,31))

What county do you want covid data for? (Format: <county>, <state>)  roanoke city, va


Provided County Name: Roanoke City
County Code: 51770
Metro Code: 40220
State Abbreviation: VA


TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''