In [1]:
import numpy as np
import pandas as pd
import json
from datetime import date
import math

In [2]:
start = date(2020, 3, 16)
today = date(2020, 11, 11)
num_days = (today-start).days+1

#### Confirmed Cases

In [3]:
df = pd.read_csv("Covid-19.csv")
df = df[["Time Stamp", "Region", "Number of cases"]]
df["Time Stamp"] = pd.to_datetime(df["Time Stamp"]).apply(lambda x: x.date())
df.sort_values(by = "Time Stamp", inplace = True)

In [4]:
g1 = df.groupby(by = "Region")

#### Case Density

In [5]:
df = pd.read_csv("Covid-19-density.csv")
df = df[["Time Stamp", "Region", "Density"]]
df["Time Stamp"] = pd.to_datetime(df["Time Stamp"]).apply(lambda x: x.date())
df.sort_values(by = "Time Stamp", inplace = True)

In [6]:
g2 = df.groupby(by = "Region")

#### Risk Score

In [7]:
df = pd.read_csv("Covid-19-R.csv")
df = df[["Time Stamp", "Region", "R"]]
df["Time Stamp"] = pd.to_datetime(df["Time Stamp"]).apply(lambda x: x.date())
df.sort_values(by = "Time Stamp", inplace = True)

In [8]:
g3 = df.groupby(by = "Region")

#### Total Case & Daily Case

In [9]:
with open("total_cases.json", 'r') as f:
    reader = json.load(f)

total_cases = [0] * num_days
for key, value in reader.items():
    total_cases[int(key)-16] = int(value[1].replace(",", ""))

daily_cases = [total_cases[0]] * num_days
for i in range(1, num_days):
    daily_cases[i] = total_cases[i]-total_cases[i-1]
    
daily_cases_avg = [0] * num_days
for i in range(num_days):
    if i < 7: daily_cases_avg[i] = round(total_cases[i]/(i+1), 1)
    else: daily_cases_avg[i] = round((total_cases[i]-total_cases[i-7])/7, 1)

#### Total Death & Daily Death

In [18]:
with open("total_deaths.json", 'r') as f:
    reader = json.load(f)

total_deaths = [0] * num_days
for key, value in reader.items():
    total_deaths[int(key)-16] = int(value[0][1].replace(",", ""))

daily_deaths = [total_deaths[0]] * num_days
for i in range(1, num_days):
    daily_deaths[i] = total_deaths[i]-total_deaths[i-1]
    
daily_deaths_avg = [0] * num_days
for i in range(num_days):
    if i < 7: daily_deaths_avg[i] = round(total_deaths[i]/(i+1), 1)
    else: daily_deaths_avg[i] = round((total_deaths[i]-total_deaths[i-7])/7, 1)

#### Construct Dict

In [19]:
region_density = {}
for name, group in g2:
    region_density[name] = [0] * num_days
    for index, row in group.iterrows():
        region_density[name][(row["Time Stamp"]-start).days] = round(row["Density"], 5)

In [20]:
region_R = {}
for name, group in g3:
    region_R[name] = [0 for _ in range(num_days)]
    for index, row in group.iterrows():
        region_R[name][(row["Time Stamp"]-start).days] = round(row["R"], 3) 

In [21]:
sequence = {}
sequence["dates"] = []
for t in pd.date_range(start, today):
    sequence["dates"].append(t.strftime("%m-%d"))

sequence["total_cases"] = total_cases
sequence["daily_cases"] = daily_cases
sequence["daily_cases_avg"] = daily_cases_avg

sequence["total_deaths"] = total_deaths
sequence["daily_deaths"] = daily_deaths
sequence["daily_deaths_avg"] = daily_deaths_avg

In [22]:
top_cases = list(g1.tail(1).sort_values(by = "Number of cases").tail(6)["Region"])
sequence["top_cases"] = dict(zip(top_cases, [list(g1.get_group(x)["Number of cases"]) for x in top_cases]))

top_density = list(g2.tail(1).sort_values(by = "Density").tail(6)["Region"])
sequence["top_density"] = dict(zip(top_density, [list(g2.get_group(x)["Density"]) for x in top_density]))

#### Generate JSON

In [23]:
with open('region_density.json', 'w') as f:
    json.dump(region_density, f, ensure_ascii = False)

In [24]:
with open('region_R.json', 'w') as f:
    json.dump(region_R, f, ensure_ascii = False)

In [25]:
with open('sequence.json', 'w') as f:
    json.dump(sequence, f, ensure_ascii = False)