In [11]:
import json
import pandas as pd
import os
import re
import requests
from datetime import date
from datetime import timezone
from datetime import time
from datetime import timedelta
from datetime import datetime
import math
import numpy as np
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
import numbers

with open('api_key.json') as api_file:
    data = json.load(api_file)
    
api_key = data['apiKey']
url_dict = {"county":"county","metro":"cbsa","state":"state"}
base_url = "https://api.covidactnow.org/v2"
url_f = "/{cat}/{code}.timeseries.json"
raw_files = os.listdir("raw")

def all_capitalize(string, join_str=" ", split_str=" "):
    return join_str.join(list(map(
        lambda s: s.capitalize(), 
        re.split(split_str, string)
        )))

class County:
    a_path = "abbrevs.csv"
    c_path = "county_codes.csv"
    m_path = "metro_codes.csv"
    
    a_head = {"short": "Abbrev", "long": "State"}
    
    state_err = "No such state: {state}"
    metro_err = "No Metro for county: {county}, {state}"
    
    def __init__(self, county):
        county_arr = county.split(", ")
        self.name = all_capitalize(county_arr[0], split_str='[ -]')
        self.raw_state = county_arr[1]
        self.short_state = self.get_state("short")
        self.long_state = self.get_state("long")
        self.code = self.get_county_code()
        self.metro_code = self.get_metro_code()
        
    def get_state(self, choice):
        abbrevs = pd.read_csv(County.a_path)
        row = None
        if len(self.raw_state) == 2:
            row = abbrevs[abbrevs["Abbrev"] == self.raw_state.upper()]
        else:
            row = abbrevs[abbrevs["State"] == all_capitalize(self.raw_state)]
        if row.shape[0] == 0:
            print(County.state_err.format(state=self.raw_state))
        else:
            return row[County.a_head[choice]].values[0]

    def get_county_code(self):
        county_codes = pd.read_csv(County.c_path)
        logic = (
            (county_codes["County Name"] == self.name) 
            & (county_codes["State"] == self.short_state)
        )
        return county_codes.loc[logic, "County Code"].values[0]

    def get_metro_code(self):
        metro_codes = pd.read_csv(County.m_path)
        logic = (
            (
                metro_codes["County Name"]
                .apply(lambda big: big.find(self.name) == 0)
            )
            & (metro_codes["State Name"] == self.long_state)
        )
        code_arr = (metro_codes.loc[logic, "Metro Code"])
        if len(code_arr) != 0:
            return code_arr.values[0]
        else:
            print(County.metro_err.format(
                county=self.name, 
                state=self.long_state
                ))
    
    def get_code(self, choice):
        err_msg = "Not a valid data category: {cat}"
        code = None
        if choice == "county":
            code = self.code
        elif choice == "metro":
            code = self.metro_code
        elif choice == "state":
            code = self.short_state
        else:
            print(err_msg.format(cat=choice))
            
        return code
            
            
def can_today():
    tz = timezone.utc
    dt_now = datetime.now(tz)
    time_now = dt_now.timetz()
    day_now = dt_now.date()
    if time_now <= time(hour=17, tzinfo=tz):
        day_now -= timedelta(days=1)
    return day_now

def url_path(choice, code):
    err_msg = "Not a valid data category: {cat}"
    if choice in url_dict:
        if isinstance(code, numbers.Number):
            code = f'{code:05}'
        return url_f.format(cat=url_dict[choice], code=str(code))
    else:
        print(err_msg.format(cat=choice))

def raw_path(choice, code, day):
    path_f = "raw/{cat}_{code}_{day}.json"
    err_msg = "Not a valid data category: {cat}"
    if choice in url_dict:
        code = str(code).lower()
        path = path_f.format(
            cat=choice, 
            code=code, 
            day=day.isoformat()
        )
        return path
    else:
        print(err_msg.format(cat=choice))
        
def raw_get(choice, code, day):
    search_f = "{cat}_{code}"
    raw_f = "raw/{file}"
    err_msg = (
        "Trouble with website: {error}\n"\
        "Status code: {status}\n"\
        "GET request url: {url}"
    )
    if choice in url_dict:
        path = raw_path(choice, code, day)
        reqs = search_f.format(cat=choice, code=str(code).lower())
        rep = re.compile(reqs)
        fm = list(filter(rep.search, raw_files))
        dm = list(filter(re.compile(day.isoformat()).search, fm))
        if len(dm) != 1:
            full_url = base_url + url_path(choice, code)
            r = requests.get(full_url, params=data)
            if r.status_code == 200:
                f = open(path, "w")
                f.write(r.text)
                f.close()
                for f in fm:
                    os.remove(raw_f.format(file=f))
                return path
            else:
                print(err_msg.format(
                    error=r.reason, 
                    status=str(r.status_code), 
                    url=full_url)
                )
                
        else:
            del_match = [item for item in fm if item not in dm]
            for f in del_match:
                os.remove(raw_f.format(file=f))
            
            return path
        
def clean_path(choice, code):
    clean_f = "clean/{cat}_{code}.csv"
    err_msg = "Not a valid data category: {cat}"
    if choice in url_dict:
        code = str(code).lower()
        return clean_f.format(cat=choice, code=code)
    else:
        print(err_msg.format(cat=choice))

def clean_get(choice, code, min_day, today):
    raw_path = raw_get(choice, code, today)
    if raw_path == None:
        return
    with open(raw_path) as raw_file:
        raw_json = json.load(raw_file)
        
    new_path = clean_path(choice, code)
    
    actuals = pd.DataFrame(raw_json["actualsTimeseries"])
    metrics = pd.DataFrame(raw_json["metricsTimeseries"])
    actuals = actuals.loc[
        actuals["date"].apply(
            lambda s: 
            date.fromisoformat(s)
        ) >= min_day,
        [
            "date",
            "newCases",
            "vaccinationsInitiated",
            "vaccinationsCompleted"
        ]
    ]
    metrics = metrics.loc[
        metrics["date"].apply(
            lambda s: 
            date.fromisoformat(s)
        ) >= min_day,
        [
            "caseDensity",
            "infectionRate",
            "testPositivityRatio",
            "icuCapacityRatio",
            "vaccinationsInitiatedRatio",
            "vaccinationsCompletedRatio"
        ]
    ].fillna(method="pad").fillna(method="bfill")
    clean = pd.concat([actuals, metrics], axis=1, join="inner")
    clean["tickCaseDensity"] = (
        clean["caseDensity"]
        .apply(
            lambda d: 
            math.ceil(math.log2(d+0.01))
        )
    )
    clean["tickInfectionRate"] = (
        clean["infectionRate"]
        .apply(
            lambda d: 
            math.floor(d*20)-16
        )
    )
    clean["tickTestPositivityRatio"] = (
        clean["testPositivityRatio"]
        .apply(
            lambda d: math.floor(d*100/3)
        )
    )
    clean["tickIcuCapacityRatio"] = (
        clean["icuCapacityRatio"]
        .apply(
            lambda d: 
            max([0, math.floor((0 if (d is None) else d)*20)-13])
        )
    )
    clean["tickTotal"] = (
        clean["tickCaseDensity"] 
        + clean["tickInfectionRate"] 
        + clean["tickTestPositivityRatio"] 
        + clean["tickIcuCapacityRatio"]
    )
    clean.to_csv(new_path)
    return new_path

def county_info(county):
    print("Provided County Name: " + county.name)
    print("County Code: " + str(county.code))
    print("Metro Code: " + str(county.metro_code))
    print("State Abbreviation: " + county.short_state)

def county_files(county, min_date):
    tday = can_today()
    raw_county_path = raw_path("county", county.code, tday)
    raw_metro_path = raw_path("metro", county.metro_code, tday)
    raw_state_path = raw_path("state", county.short_state, tday)
    
    clean_county_path = clean_get("county", county.code, min_date, tday)
    clean_metro_path = clean_get("metro", county.metro_code, min_date, tday)
    clean_state_path = clean_get("state", county.short_state, min_date, tday)
    
def one_tick_df(county, choice):
    path = clean_path(choice, county.get_code(choice))
    clean_df = pd.read_csv(path)
    tick_df = clean_df.loc[:, ["date", "tickTotal"]]
    tick_df["date"] = pd.to_datetime(tick_df["date"])
    tick_df = tick_df.rename(columns={"tickTotal": choice})
    return tick_df
    
def all_tick_df(county):
    county_ticks = one_tick_df(county, "county")
    state_ticks = one_tick_df(county, "state")
    
    all_ticks = pd.merge(county_ticks, state_ticks, on="date")
    
    if county.metro_code != None:
        metro_ticks = one_tick_df(county, "metro")
        
        all_ticks = pd.merge(all_ticks, metro_ticks, on="date")
        
    return all_ticks

def basic_chart(county):
    all_ticks = all_tick_df(county)
    
    output_notebook()
    p = figure(
        title="Ticks for " + county.name + ", " + county.short_state, 
        x_axis_label="date", 
        y_axis_label="ticks", 
        x_axis_type='datetime'
    )
    
    p.line(
        all_ticks["date"], 
        all_ticks["county"], 
        legend_label="County", 
        line_color="red"
    )
    if county.metro_code != None:
        p.line(
            all_ticks["date"], 
            all_ticks["metro"], 
            legend_label="Metro", 
            line_color="blue"
        )
    
    p.line(
        all_ticks["date"], 
        all_ticks["state"], 
        legend_label="State", 
        line_color="green"
    )
    show(p)
    
def average_chart(county):
    all_ticks = all_tick_df(county)
    all_ticks["mean"] = all_ticks.mean(axis=1)
    all_ticks["mean"] = all_ticks["mean"].apply(np.ceil)
    print(all_ticks)
    
def info_and_chart(county, min_date):
    c = County(county)
    county_info(c)
    county_files(c, min_date)
    basic_chart(c)

county_str = input(
    "What county do you want covid data for? (Format: <county>, <state>) "
)
info_and_chart(county_str, date(2021,5,1))

What county do you want covid data for? (Format: <county>, <state>)  philadelphia, pa


Provided County Name: Philadelphia
County Code: 42101
Metro Code: 37980
State Abbreviation: PA


In [6]:
county_str = input("What county do you want covid data for? (Format: <county>, <state>) ")
info_and_chart(county_str, date(2020,12,31))

What county do you want covid data for? (Format: <county>, <state>)  roanoke city, va


Provided County Name: Roanoke City
County Code: 51770
Metro Code: 40220
State Abbreviation: VA


In [29]:
df = pd.DataFrame({'A': [3,4,4,4,5,4,3,4,5,4,6,7,8,6,7,8,7,6,7,8]})
df

Unnamed: 0,A
0,3
1,4
2,4
3,4
4,5
5,4
6,3
7,4
8,5
9,4


In [30]:
test = df.rolling(2).max()
test = test['A'] - 1
test

0     NaN
1     3.0
2     3.0
3     3.0
4     4.0
5     4.0
6     3.0
7     3.0
8     4.0
9     4.0
10    5.0
11    6.0
12    7.0
13    7.0
14    6.0
15    7.0
16    7.0
17    6.0
18    6.0
19    7.0
Name: A, dtype: float64

In [59]:
county_str = input("What county do you want covid data for? (Format: <county>, <state>) ")
c = County(county_str)
average_chart(c)

What county do you want covid data for? (Format: <county>, <state>)  philadelphia, pa


          date  county  state  metro  mean
0   2020-07-01      11     12      9  11.0
1   2020-07-02      12     12      9  11.0
2   2020-07-03      11     12      9  11.0
3   2020-07-04      11     12      9  11.0
4   2020-07-05      11     12      9  11.0
..         ...     ...    ...    ...   ...
477 2021-10-21      11     13      9  11.0
478 2021-10-22      11     12      9  11.0
479 2021-10-23      10     12      9  11.0
480 2021-10-24       9     11      9  10.0
481 2021-10-25       9     12      9  10.0

[482 rows x 5 columns]
