## Importing Libraries & Loading Data

In [12]:
import pandas as pd
import numpy as np
from sklearn import linear_model
import matplotlib.pyplot as plt
import statsmodels.api as sm

In [None]:
case_file = 'time_series_covid19_confirmed_global.csv'
case_data = pd.read_csv(case_file)

In [None]:
case_data.head(4)

## Question 1

In [None]:
N = 4
names = case_data.iloc[0:N]["Country/Region"]
raw_cases = np.array(case_data.iloc[0:N].drop(["Province/State", "Country/Region", "Lat", "Long"], axis=1))

In [None]:
nc_counts = np.zeros((raw_cases[0].size - 1) * N).reshape(N, -1)

In [None]:
for i in range(N):
    x = raw_cases[i]
    nc_counts[i] = x[1 : x.size] - x[0 : (x.size - 1)]

In [None]:
cases = [count[np.min(np.where(count != 0)): count.size] for count in nc_counts]

In [None]:
fig, axs = plt.subplots(4, figsize=(15, 4))
fig.suptitle("New Covid-19 Cases Time Series By Country")
plt.xlabel("Days Since First Nonzero Case Day")
for i in range(N):
    axs[i].plot(np.arange(0, cases[i].size, 1), cases[i])
    axs[i].set_ylabel(f'{names[i]}')

## Question 2

In [None]:
fig, axs = plt.subplots(4, figsize=(15, 4))
fig.suptitle("Covid-19 Cases Time Series Vs Smoothed (Lowess) Version")
plt.xlabel("Days Since First Nonzero Case Day")
smooth_cases = [np.zeros(cases[i].size) for i in range(N)]
lowess = sm.nonparametric.lowess
for i in range(N):
    smooth_cases[i] = lowess(cases[i], np.arange(0, cases[i].size, 1), frac = .05, return_sorted = False)
    axs[i].plot(cases[i])
    axs[i].plot(smooth_cases[i])
    axs[i].set_ylabel(f'{names[i]}')
    axs[i].legend(["Raw", "Smoothed"])

## Question 3

In [None]:
fig, axs = plt.subplots(4, figsize=(15, 4))
fig.suptitle("Covid-19 Case Data Residuals (Observed - Smoothed)")
plt.xlabel("Days Since First Nonzero Case Day")
for i in range(N):
    res = cases[i] - smooth_cases[i]
    axs[i].plot(res)
    axs[i].set_ylabel(f'{names[i]}')

## Question 4

In [None]:
death_data = pd.read_csv("time_series_covid19_deaths_global.csv")

In [None]:
death_data.head(4)

In [None]:
tot_fatal = np.sum(np.array(death_data.iloc[:, -1])) / np.sum(np.array(case_data.iloc[:, -1]))
print(f'The overall case fatality rate is {tot_fatal:.4f}, i.e. {tot_fatal * 100:.2f}%.')

## Question 5

In [None]:
cum_cases = [count[np.min(np.where(count != 0)): count.size] for count in raw_cases]

In [None]:
raw_deaths = np.array(death_data.iloc[0:N].drop(["Province/State", "Country/Region", "Lat", "Long"], axis=1))
cum_deaths = [raw_deaths[i][(raw_cases[i].size - cum_cases[i].size):] for i in range(N)]
fatal_est = [cum_deaths[i] / cum_cases[i] for i in range(N)]

In [None]:
fig, axs = plt.subplots(4, figsize=(15, 4))
fig.suptitle("Covid-19 Case Fatality Rate Over Time")
plt.xlabel("Days Since First Nonzero Case Day")
for i in range(N):
    axs[i].plot(fatal_est[i])
    axs[i].set_ylabel(f'{names[i]}')