In [18]:
import pandas as pd
import numpy as np
import torch
torch.cuda.is_available()

True

In [19]:
use_cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if use_cuda else 'cpu')
print(f'Device: {device}')

Device: cuda:0


In [20]:
ST_PATH = '../Datasets/stationary.csv'

stationary_data = [], []

with open(ST_PATH, 'r') as f:
    next(f)
    
    for line in f:
        l = line.strip().split(',')
        
        code = l[0].replace('"', '')
        values = np.array([float(e) for e in l[1:]], dtype=np.float32)
        
        stationary_data[0].append(code)
        stationary_data[1].append(values)

In [21]:
from sklearn.preprocessing import StandardScaler

stationary_X = StandardScaler().fit_transform(stationary_data[1])
stationary_data = {k: v for k, v in zip(stationary_data[0], np.array(stationary_X, dtype=np.float32))}

In [22]:
TS_PATH = '../Datasets/time_variable.csv'

df_ts = pd.read_csv(TS_PATH, parse_dates=['date'])
df_ts.head()

Unnamed: 0,date,confirmed,recovered,deaths,country_code,StringencyIndex,StringencyIndexForDisplay,cum_cases,cum_recovered,cum_deaths,since_first
0,2020-02-24,1,0,0,AFG,0.0,0.0,1,0,0,0
1,2020-02-25,0,0,0,AFG,0.0,0.0,1,0,0,1
2,2020-02-26,0,0,0,AFG,0.0,0.0,1,0,0,2
3,2020-02-27,0,0,0,AFG,0.0,0.0,1,0,0,3
4,2020-02-28,0,0,0,AFG,0.0,0.0,1,0,0,4


In [23]:
ts_grouped = df_ts.groupby('country_code')

ts_data = {}

for name, df in ts_grouped:
    array = df.drop(['date', 'country_code'], 1).values
    ts_data[name] = np.array(array, dtype=np.float32)
    
len(ts_data)

196

In [30]:
ts_dataset = {}

WINDOW = 1

for code, array in ts_data.items():
    X, Y = [], []
    
    for i in range(len(array) - WINDOW):
        x = array[i:i+WINDOW].T
        y = array[i+WINDOW, [0, 1, 2, 3, 4]]
        
        X.append(x)
        Y.append(y)
        
    ts_dataset[code] = (X, Y)
    
ts_dataset['HRV'][0][-1].T, ts_dataset['HRV'][1][-2]

(array([[  50.   ,   56.   ,    2.   ,   86.395,  100.   , 1791.   ,
          529.   ,   35.   ,   51.   ]], dtype=float32),
 array([ 50.   ,  56.   ,   2.   ,  86.395, 100.   ], dtype=float32))

In [31]:
from torch.utils.data import Dataset
from collections import namedtuple

Example = namedtuple('Example', ['code', 'x', 'y'])

class CovidData(Dataset):
    def __init__(self, ts, st, transform=None):
        self.st = st
        self.transform = transform
        
        self.data = []
        for code, (X, Y) in ts.items():
            for x, y in zip(X, Y):
                e = Example(code, x, y)
                self.data.append(e)
                
    def __getitem__(self, idx):
        code, x, y = self.data[idx]
        st = self.st[code]
        
        sample = code, x, st, y
        
        if self.transform:
            sample = self.transform(sample)
            
        return sample
    
    def __len__(self):
        return len(self.data)
    
train_data = CovidData(ts_dataset, stationary_data)

train_data[57]

('AFG', array([[ 1.  ],
        [ 0.  ],
        [ 0.  ],
        [42.86],
        [42.86],
        [22.  ],
        [ 1.  ],
        [ 0.  ],
        [22.  ]], dtype=float32), array([-0.49340332, -0.55448234, -0.6626172 , -0.6663111 , -1.1002071 ,
        -0.20613728, -1.7264656 ], dtype=float32), array([ 0.  ,  0.  ,  0.  , 42.86, 42.86], dtype=float32))

In [27]:
import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        
        self.st = nn.Sequential(
            nn.Linear(7, 16, bias=False),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.Linear(16, 32, bias=False),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Linear(32, 8)
        )
        
        self.ts = nn.Sequential(
            nn.Conv1d(9, 16, kernel_size=7, stride=1, padding=3, bias=False),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.Conv1d(16, 32, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Conv1d(32, 64, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Conv1d(64, 128, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Conv1d(128, 64, kernel_size=3, stride=1, padding=1),
            nn.AdaptiveAvgPool1d(4)
        )
        
        
        self.joint = nn.Sequential(
            nn.Linear(8+64*4, 32, bias=False),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Linear(32, 5)
        )
        
    def forward(self, ts, st):
        ts = self.ts(ts).reshape(-1, 64*4)
        st = self.st(st)
        
        x = torch.cat((ts, st), 1)
        return self.joint(x)
    
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        
        self.fc = nn.Linear(9, 5)
        
    def forward(self, x):
        return self.fc(x)

In [None]:
code, x, st, y = next(iter(dl))
x, st, y = x.to(device), st.to(device), y.to(device)

model(x, st)

In [3]:
from fbprophet import Prophet
import pandas as pd
import numpy as np

In [16]:
ts_grouped = df_ts.groupby('country_code')

hrv = ts_grouped.get_group('HRV')
hrv = hrv.drop(['country_code', 'since_first'], 1)
hrv = hrv.rename(columns={'date': 'ds', 'confirmed': 'y'})
hrv = hrv[['ds', 'y']]

train_hrv = hrv[(hrv['ds'] <= '2020-04-10')]
test_hrv = hrv[(hrv['ds'] > '2020-04-10')]

test_hrv

Unnamed: 0,ds,y
2034,2020-04-11,39
2035,2020-04-12,66
2036,2020-04-13,50
2037,2020-04-14,54
2038,2020-04-15,37
2039,2020-04-16,50
2040,2020-04-17,23
