In [None]:
# Importing libraries/functions the project needs
import urllib
from datetime import date
from datetime import timedelta
from datetime import datetime
import numpy as np
import pandas as pd
import pymysql
from sqlalchemy import create_engine

In [None]:
def create_daily_report():
    today = date.today()
    today_str = today.strftime('%m-%d-%Y')
    day_delta = timedelta(days=1)
    while True:
        try:
            csv_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{}.csv"
            csv_url = csv_url.format(today_str)
            daily_report = pd.read_csv(csv_url)
            print("Daily report for {} is available.".format(today_str))
            break
        except urllib.error.HTTPError:
            print("Daily report for {} is still unavailable.".format(today_str))
            today -= day_delta
            today_str = today.strftime('%m-%d-%Y')
            print("Now we'll try {}.".format(today_str))
    return daily_report

In [None]:
def create_time_series_global():
    case_types = ['confirmed', 'deaths', 'recovered']
    idVars = ['Province/State', 'Country/Region', 'Lat', 'Long']
    time_series_global = pd.DataFrame()
    for case_type in case_types:
        csv_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_{}_global.csv"
        csv_url = csv_url.format(case_type)
        time_series_covid19_global = pd.read_csv(csv_url)
        time_series_covid19_global_long = pd.melt(time_series_covid19_global, id_vars=idVars, var_name='Date', value_name='Case')
        time_series_covid19_global_long.insert(0, 'CaseType', case_type)
        time_series_global = time_series_global.append(time_series_covid19_global_long)
    time_series_global = time_series_global.assign(Date=lambda x: pd.to_datetime(x.Date))
    filled_province_state = [country if pd.isna(province) else province for country, province in zip(time_series_global['Country/Region'].values, time_series_global['Province/State'].values)]
    time_series_global = time_series_global.drop('Province/State', axis=1)
    time_series_global.insert(1, 'Province/State', filled_province_state)
    time_series_global = time_series_global.sort_values(['Province/State', 'Country/Region', 'CaseType', 'Date']).reset_index(drop=True)
    groupby_obj = time_series_global.groupby(['CaseType', 'Country/Region', 'Province/State'])
    case_shifted = groupby_obj.shift(1)['Case'].values
    case_shifted = np.where(np.isnan(case_shifted), 0, case_shifted)
    daily_increase = time_series_global['Case'].values - case_shifted
    time_series_global['Daily_Increase'] = daily_increase
    return time_series_global

In [None]:
def create_time_series_US():
    case_types = ['confirmed', 'deaths']
    idVars = idVars = ['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State', 'Country_Region', 'Lat', 'Long_', 'Combined_Key']
    time_series_US = pd.DataFrame()
    for case_type in case_types:
        csv_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_{}_US.csv"
        csv_url = csv_url.format(case_type)
        time_series_covid19_US = pd.read_csv(csv_url)
        if case_type == 'deaths':
            time_series_covid19_US = time_series_covid19_US.drop(labels='Population', axis=1)
        time_series_covid19_US_long = pd.melt(time_series_covid19_US, id_vars=idVars, var_name='Date', value_name='Case')
        time_series_covid19_US_long.insert(0, 'CaseType', case_type)
        time_series_US = time_series_US.append(time_series_covid19_US_long)
    time_series_US = time_series_US.assign(Date=lambda x: pd.to_datetime(x.Date))
    time_series_US = time_series_US.sort_values(['Admin2', 'Province_State', 'CaseType', 'Date']).reset_index(drop=True)
    groupby_obj = time_series_US.groupby(['CaseType', 'Combined_Key'])
    case_shifted = groupby_obj.shift(1)['Case'].values
    case_shifted = np.where(np.isnan(case_shifted), 0, case_shifted)
    daily_increase = time_series_US['Case'].values - case_shifted
    time_series_US['Daily_Increase'] = daily_increase
    return time_series_US

In [None]:
# UID_ISO_FIPS_LookUp_Table
csv_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv"
UID_ISO_FIPS_LookUp_Table = pd.read_csv(csv_url)

In [None]:
daily_report = create_daily_report()
time_series_global = create_time_series_global()
time_series_US = create_time_series_US()

Daily report for 07-26-2020 is still unavailable.
Now we'll try 07-25-2020.
Daily report for 07-25-2020 is available.
