In [1]:
try:
  from bs4 import BeautifulSoup
except:
  !pip install beautifulsoup4
  from bs4 import BeautifulSoup
  
import requests
import numpy as np
from dateutil import parser
from datetime import datetime
import pandas as pd
from sys import platform

repo = "https://github.com/CSSEGISandData/COVID-19"

tsc_csv = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"
tsm_csv = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv"
tsr_csv = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv"

def day_tot(day):
  return f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{day.strftime('%m-%d-%Y')}.csv"

pd.set_option('display.max_rows', 200)

states = {
  'AK': 'Alaska', 'AL': 'Alabama', 'AR': 'Arkansas', 'AS': 'American Samoa',
  'AZ': 'Arizona', 'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut',
  'DC': 'District of Columbia', 'DE': 'Delaware', 'FL': 'Florida', 'GA': 'Georgia',
  'GU': 'Guam', 'HI': 'Hawaii', 'IA': 'Iowa', 'ID': 'Idaho', 'IL': 'Illinois',
  'IN': 'Indiana', 'KS': 'Kansas', 'KY': 'Kentucky', 'LA': 'Louisiana', 'MA': 'Massachusetts',
  'MD': 'Maryland', 'ME': 'Maine', 'MI': 'Michigan', 'MN': 'Minnesota',
  'MO': 'Missouri', 'MP': 'Northern Mariana Islands', 'MS': 'Mississippi',
  'MT': 'Montana', 'NA': 'National', 'NC': 'North Carolina', 'ND': 'North Dakota',
  'NE': 'Nebraska', 'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico',
  'NV': 'Nevada', 'NY': 'New York', 'OH': 'Ohio', 'OK': 'Oklahoma', 'OR': 'Oregon',
  'PA': 'Pennsylvania', 'PR': 'Puerto Rico', 'RI': 'Rhode Island', 'SC': 'South Carolina',
  'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah',
  'VA': 'Virginia', 'VI': 'Virgin Islands', 'VT': 'Vermont', 'WA': 'Washington',
  'WI': 'Wisconsin', 'WV': 'West Virginia', 'WY': 'Wyoming', 'AB': 'Alberta',
  'BC': 'British Columbia', 'MB': 'Manitoba', 'NB': 'New Brunswick',
  'NL': 'Newfoundland and Labrador', 'NT': 'Northwest Territories',
  'NS': 'Nova Scotia', 'NU': 'Nunavut', 'ON': 'Ontario', 'PE': 'Prince Edward Island',
  'QC': 'Quebec', 'SK': 'Saskatchewan', 'YT': 'Yukon'
}

def cdate(date):
  if platform == 'win32':
    return date.strftime("%#m/%#d/%y")
  else:
    return date.strftime("%-m/%-d/%y")

def datecols(df):
  return [col for i, col in enumerate(df.columns) if is_date(col) ]

def update_firsts(df, firsts_col):
  # Get date of first death or days since first death.
  df[f'{firsts_col}'] = ''
  dates = sorted([parser.parse(col) for col in df.columns if is_date(col) ])
  # print(f"{firsts_col} dates:", dates, df.columns)
  for i, row in df.iterrows():
    found_first = False
    for date in dates:
      try:
        if not found_first and row[cdate(date)] > 0:
          found_first = True
          df.at[i, firsts_col] = date.strftime('%Y-%m-%d')
          # print(f"update_firsts(df, {firsts_col}):\n", date, row[date])
      except Exception as e:
        # pass
        print(f"### ERROR update_firsts(df, {firsts_col}):\n", cdate(date), row, e)
  # print(f"update_firsts({firsts_col})\n", df.iloc[:3])

def is_date(text):
  try:
    s = parser.parse(text)
    return True
  except:
    return False

def hotten(val):
    """
    Takes a scalar and returns a string with
    the css property `'color: red'` for negative
    strings, black otherwise.
    """
    heat = str(hex(min(int(val.replace('%', '')) * 10 + 56, 255))).split('x')[-1].upper()
    color = f'#{heat}5555'
    result = 'color: %s;' % color
    # print(val, result)
    return result

In [2]:
df = pd.read_csv(tsc_csv).drop(columns=['Lat', 'Long'])
dates = datecols(df)
today_col = dates[-1]
dates.reverse()

dfr = pd.read_csv(tsr_csv)
dfm = pd.read_csv(tsm_csv).drop(columns=['Lat', 'Long'])
df.rename(columns={'Country/Region': 'Country', 'Province/State': 'State'}, inplace=True)
dfm.rename(columns={'Country/Region': 'Country', 'Province/State': 'State'}, inplace=True)
df['Country'].replace('Mainland China', 'China', inplace=True)
df['Country'].replace('United Arab Emirates', 'UAE', inplace=True)
dfm['Country'].replace('Mainland China','China', inplace=True)
dfm['Country'].replace('United Arab Emirates', 'UAE', inplace=True)
mdates = datecols(dfm)

update_firsts(df, 'First Confirmed')
update_firsts(dfm, 'First Death')

dfm.set_index(['Country', 'State'], inplace=True)
df.fillna('')
df['Country'].fillna('')
dfm.fillna('')

df.set_index(['Country', 'State'], inplace=True)
df['Death Toll'] = dfm[today_col].fillna(0).apply(lambda toll: int(toll))
df['First Death'] = dfm[dfm.columns[-1]].fillna('')
df['Death Aging'] = datetime.now() - df['First Death'].apply(lambda date: parser.parse(date, fuzzy=True) if is_date(date) else '')
df['Death Aging'] = df['Death Aging'].apply(lambda days: ' '.join(str(days).replace('NaT', '').split(' ')[:2]))
df.drop(columns=['First Death'], inplace=True)

df['Confirmed Aging'] = datetime.now() - df['First Confirmed'].apply(lambda date: parser.parse(date, fuzzy=True))
df['Confirmed Aging'] = df['Confirmed Aging'].apply(lambda days: ' '.join(str(days).split(' ')[:2]))
df.drop(columns=['First Confirmed'], inplace=True)

percents = []
drop_dates = []
rev_dates = sorted([parser.parse(date) for date in dates], reverse=True)
for i, date in enumerate(rev_dates):
  date = cdate(date)
  d = parser.parse(date)
  col = d.strftime('%B')[:3] + d.strftime('%d')

  df[col] = df[date].replace(np.inf, 0).fillna(0).astype(int)
  if i < len(dates) - 1:
    pcol = dates[i + 1]
    pct_idx = df.columns.get_loc(pcol)
    pct_col = col + '%'
    percents.append(pct_col)
    pct_val = round((df[date] / df[dates[i + 1]].fillna(0) * 100) - 100).replace(np.inf, 0).fillna(0).astype(int).astype(str) + '%'
    drop_dates.append(date)
    # df.insert(pct_idx, pct_col, pct_val)
    df[pct_col] = pct_val
df.drop(columns=dates, inplace=True)

In [4]:
# df.set_index('Country', inplace=True)
df.style.set_table_styles(
    [{'selector': 'tr:hover',
      'props': [('background-color', 'black')]}]
).applymap(hotten, subset=percents)

Unnamed: 0_level_0,Unnamed: 1_level_0,Death Toll,Death Aging,Confirmed Aging,Feb18,Feb18%,Feb17,Feb17%,Feb16,Feb16%,Feb15,Feb15%,Feb14,Feb14%,Feb13,Feb13%,Feb12,Feb12%,Feb11,Feb11%,Feb10,Feb10%,Feb09,Feb09%,Feb08,Feb08%,Feb07,Feb07%,Feb06,Feb06%,Feb05,Feb05%,Feb04,Feb04%,Feb03,Feb03%,Feb02,Feb02%,Feb01,Feb01%,Jan31,Jan31%,Jan30,Jan30%,Jan29,Jan29%,Jan28,Jan28%,Jan27,Jan27%,Jan26,Jan26%,Jan25,Jan25%,Jan24,Jan24%,Jan23,Jan23%,Jan22
Country,State,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1
China,Anhui,6,9 days,27 days,982,1%,973,1%,962,1%,950,2%,934,3%,910,2%,889,3%,860,4%,830,7%,779,6%,733,10%,665,13%,591,12%,530,10%,480,18%,408,20%,340,14%,297,25%,237,18%,200,32%,152,43%,106,51%,70,17%,60,54%,39,160%,15,67%,9,800%,1
China,Beijing,4,22 days,27 days,387,2%,381,0%,380,1%,375,1%,372,2%,366,4%,352,3%,342,1%,337,3%,326,3%,315,6%,297,8%,274,8%,253,11%,228,8%,212,11%,191,14%,168,21%,139,22%,114,3%,111,22%,91,14%,80,18%,68,66%,41,14%,36,64%,22,57%,14
China,Chongqing,5,17 days,27 days,555,0%,553,0%,551,1%,544,1%,537,2%,529,2%,518,3%,505,4%,486,4%,468,9%,428,0%,426,4%,411,6%,389,6%,366,9%,337,12%,300,21%,247,17%,211,16%,182,24%,147,11%,132,20%,110,47%,75,32%,57,111%,27,200%,9,50%,6
China,Fujian,0,,27 days,292,1%,290,1%,287,1%,285,1%,281,1%,279,3%,272,2%,267,2%,261,4%,250,5%,239,7%,224,4%,215,5%,205,6%,194,8%,179,13%,159,10%,144,20%,120,19%,101,20%,84,5%,80,36%,59,69%,35,94%,18,80%,10,100%,5,400%,1
China,Gansu,2,10 days,26 days,91,0%,91,1%,90,0%,90,0%,90,0%,90,3%,87,1%,86,4%,83,0%,83,5%,79,18%,67,8%,62,0%,62,9%,57,4%,55,8%,51,27%,40,38%,29,12%,26,8%,24,26%,19,36%,14,100%,7,75%,4,100%,2,0%,2,0%,0
China,Guangdong,4,11 days,27 days,1328,0%,1322,0%,1316,2%,1294,3%,1261,2%,1241,2%,1219,4%,1177,2%,1159,2%,1131,3%,1095,6%,1034,7%,970,8%,895,10%,813,12%,725,15%,632,18%,535,23%,436,23%,354,28%,277,34%,207,37%,151,36%,111,42%,78,47%,53,66%,32,23%,26
China,Guangxi,2,9 days,27 days,242,2%,238,0%,237,1%,235,4%,226,2%,222,0%,222,3%,215,2%,210,8%,195,7%,183,6%,172,2%,168,12%,150,8%,139,9%,127,14%,111,11%,100,15%,87,12%,78,34%,58,14%,51,11%,46,28%,36,57%,23,0%,23,360%,5,150%,2
China,Guizhou,2,13 days,27 days,146,0%,146,1%,144,1%,143,2%,140,4%,135,2%,133,5%,127,17%,109,10%,99,11%,89,10%,81,14%,71,11%,64,10%,58,26%,46,21%,38,31%,29,0%,29,142%,12,33%,9,0%,9,29%,7,40%,5,25%,4,33%,3,0%,3,200%,1
China,Hainan,4,22 days,27 days,163,0%,163,1%,162,0%,162,2%,159,1%,157,0%,157,9%,144,4%,138,5%,131,6%,124,6%,117,10%,106,7%,99,24%,80,11%,72,12%,64,3%,62,19%,52,13%,46,7%,43,8%,40,21%,33,50%,22,16%,19,138%,8,60%,5,25%,4
China,Hebei,4,26 days,27 days,306,2%,301,0%,300,3%,291,3%,283,7%,265,6%,251,5%,239,10%,218,6%,206,6%,195,13%,172,10%,157,16%,135,7%,126,12%,113,9%,104,8%,96,17%,82,26%,65,35%,48,45%,33,83%,18,38%,13,62%,8,300%,2,100%,1,0%,1
