In [1]:
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt

In [2]:
covid_data = pd.read_csv("data/owid-covid-data.csv")

In [3]:
UK_data = covid_data[covid_data["location"] == "United Kingdom"]

In [4]:
UK_data = UK_data[UK_data["new_cases"] > 0]

In [5]:
UK_data.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
367843,GBR,Europe,United Kingdom,2020-02-02,1.0,1.0,0.143,1.0,1.0,0.143,...,24.7,,2.54,81.32,0.932,67508936.0,-1956.3,-2.68,-5.6,-29.172613
367850,GBR,Europe,United Kingdom,2020-02-09,21.0,20.0,2.857,2.0,1.0,0.143,...,24.7,,2.54,81.32,0.932,67508936.0,-2995.2002,-3.46,-7.66,-44.664833
367857,GBR,Europe,United Kingdom,2020-02-16,32.0,11.0,1.571,2.0,0.0,0.0,...,24.7,,2.54,81.32,0.932,67508936.0,-3845.7002,-3.85,-6.41,-57.347607
367864,GBR,Europe,United Kingdom,2020-02-23,35.0,3.0,0.429,2.0,0.0,0.0,...,24.7,,2.54,81.32,0.932,67508936.0,-4760.6,-4.21,-6.92,-70.990715
367871,GBR,Europe,United Kingdom,2020-03-01,68.0,33.0,4.714,3.0,1.0,0.143,...,24.7,,2.54,81.32,0.932,67508936.0,-5170.9,-4.11,-3.22,-77.10916


In [6]:
UK_data["date"] = pd.to_datetime(UK_data["date"])
UK_data = UK_data[(UK_data["date"] < datetime.datetime(2023, 2, 3)) & (UK_data["date"] > datetime.datetime(2020, 8, 3))]

In [7]:
UK_data.to_csv("data/US_covid_data.csv")

In [8]:
# Conv to numpy
case_data = UK_data["new_cases"].to_numpy()/10**5

In [9]:
case_data = np.diff(case_data)
case_data = case_data[:-1]

In [10]:
def construct_design(p, dat):
    n = len(dat) - p
    X = np.zeros((n, p))
    y = dat[p:]
    for i in range(p):
        X[:, i] = np.roll(dat, i + 1)[p:]

    return X, y

In [11]:
X, y = construct_design(2, case_data)

In [12]:
y

array([ 1.12600e-02,  6.66900e-02,  7.11100e-02,  3.21600e-02,
        1.85430e-01,  2.97170e-01,  3.64040e-01,  9.56400e-02,
        3.14830e-01,  2.33100e-02,  8.36800e-02,  1.30200e-01,
       -3.41670e-01, -3.48530e-01, -1.07000e-02,  3.32980e-01,
        7.92240e-01,  2.53570e-01,  1.34649e+00,  4.49570e-01,
       -1.04794e+00, -7.17390e-01, -7.42670e-01, -4.09740e-01,
       -3.68870e-01, -1.38470e-01, -1.86830e-01, -1.99760e-01,
       -1.19200e-02, -2.15400e-02, -1.75000e-03, -1.08640e-01,
       -6.79300e-02, -2.99400e-02, -6.14000e-03, -1.68100e-02,
       -5.73000e-03,  1.04100e-02,  1.40900e-02,  5.46200e-02,
        1.13990e-01,  1.62000e-01,  1.70800e-01,  4.04190e-01,
        6.79980e-01,  5.01040e-01,  9.76940e-01, -6.61730e-01,
       -6.90810e-01,  5.88100e-02,  9.25600e-02,  2.95250e-01,
        1.21290e-01,  1.34050e-01, -1.20450e-01, -4.28820e-01,
        3.87710e-01, -6.77400e-02,  1.94420e-01,  5.03750e-01,
        2.43180e-01, -4.42780e-01, -3.36660e-01,  1.462

In [13]:
np.var(y)

0.7446217034931971

In [20]:
X = np.c_[np.ones(len(y)), X]

In [21]:
X

array([[ 1.00000e+00, -6.90000e-04,  1.37900e-02],
       [ 1.00000e+00,  1.12600e-02, -6.90000e-04],
       [ 1.00000e+00,  6.66900e-02,  1.12600e-02],
       [ 1.00000e+00,  7.11100e-02,  6.66900e-02],
       [ 1.00000e+00,  3.21600e-02,  7.11100e-02],
       [ 1.00000e+00,  1.85430e-01,  3.21600e-02],
       [ 1.00000e+00,  2.97170e-01,  1.85430e-01],
       [ 1.00000e+00,  3.64040e-01,  2.97170e-01],
       [ 1.00000e+00,  9.56400e-02,  3.64040e-01],
       [ 1.00000e+00,  3.14830e-01,  9.56400e-02],
       [ 1.00000e+00,  2.33100e-02,  3.14830e-01],
       [ 1.00000e+00,  8.36800e-02,  2.33100e-02],
       [ 1.00000e+00,  1.30200e-01,  8.36800e-02],
       [ 1.00000e+00, -3.41670e-01,  1.30200e-01],
       [ 1.00000e+00, -3.48530e-01, -3.41670e-01],
       [ 1.00000e+00, -1.07000e-02, -3.48530e-01],
       [ 1.00000e+00,  3.32980e-01, -1.07000e-02],
       [ 1.00000e+00,  7.92240e-01,  3.32980e-01],
       [ 1.00000e+00,  2.53570e-01,  7.92240e-01],
       [ 1.00000e+00,  1.34649e

In [22]:
np.savez("covid_dat.npz", X= X, y=y)