### TODO 
1. Test for all countries and add country specific attributes. 
2. Ablation study with **pd.fillna()** and **pd.notna()**. 
3. Ablation study with and w/o growth_rate. 

### Common libraries 

In [39]:
import torch.nn.functional as F
import torch 
import torch.optim as optim
import sys
import torch.nn as nn
import pandas as pd 
import os 
import numpy as np

### Reading data

In [45]:
filenames = ["c1_school_closing.csv", "c2_workplace_closing.csv", "c3_cancel_public_events.csv",
            "c4_restrictions_on_gatherings.csv", "c5_close_public_transport.csv", "c6_stay_at_home_requirements.csv", "c7_movementrestrictions.csv",
            "c8_internationaltravel.csv", "confirmed_cases.csv"]
# filenames = np.core.defchararray.add('timeseries/', np.asarray(filenames))
npi_data = pd.DataFrame({})

# date extraction 
file = filenames[0]
npi_df = pd.read_csv(os.path.join('timeseries', file))
npi_df = npi_df[npi_df['country_name']=='India'].iloc[:,3:].T
npi_data['Date'] = npi_df[77].index.values
npi_data['Index'] = npi_data['Date'].index.values

# other attributes extraction 
for file in filenames:
    npi_df = pd.read_csv(os.path.join('timeseries', file))
    npi_df = npi_df[npi_df['country_name']=='India'].iloc[:,3:].T
    npi_data[file[:-4]] = npi_df[77].values

# compute growth rate 
npi_data['growth_rate'] = npi_data['confirmed_cases'].diff()
npi_data['growth_rate'] = 100*npi_data['growth_rate']/npi_data['confirmed_cases'] # (0,1) or (0,100)

# smoothing growth_rate
# npi_data['growth_rate'] = npi_data['growth_rate'].rolling(5,win_type='gaussian').sum(std=3)

# cleaning df

npi_data = npi_data[64:]
for col in npi_data.columns:
    npi_data= npi_data[npi_data[col].notna()]

### Baseline linear model 

In [41]:
class LinearModel(torch.nn.Module):
    def __init__(self, in_=300, out_=1):
        super(LinearModel, self).__init__()
        self.linear1 = torch.nn.Linear(in_, 64)
        self.linear2 = torch.nn.Linear(64, 8)
        self.linear3 = torch.nn.Linear(8, 1)
        self.relu = torch.nn.ReLU()
    def forward(self, x):
        x = x.reshape(-1)
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        x = self.relu(x)
        x = self.linear3(x)
        return x

### Training baseline model without positional info

1. Add country specific info here while creating dataset for model. 

In [46]:
history = 30

y_total = npi_data['growth_rate'].values
npi_data.drop(columns=['Date', 'confirmed_cases','growth_rate','Index'],inplace=True)
x_total = npi_data.to_numpy(dtype=np.float)

x_total = torch.from_numpy(x_total).to(dtype=torch.double)
y_total = torch.from_numpy(y_total).to(dtype=torch.double)

model = LinearModel(x_total.shape[1]*history).to(dtype=torch.double)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
mse_loss = torch.nn.MSELoss()

total_loss = 0.0
split_index = 200

x_train = torch.clone(x_total[0:split_index,:])
y_train = torch.clone(y_total[0:split_index])
x_test = torch.clone(x_total[split_index-history:,:])
y_test = torch.clone(y_total[split_index-history:])

print('attributes | ', x_train.shape[1])
print('training | ', len(x_train))
print('test | ', len(x_test)-history)
print('-'*15)

model.eval()

# x_test = [#days, #attributes]

for i in range(history,len(x_test)-1):
    x_temp = x_test[i-history:i,:] # [30,9] --> [1,30*9]
    y_pred = model(x_temp)
    loss = mse_loss(y_pred, y_test[i+1])
    total_loss += loss.item()
print('loss before training %0.2f' %(total_loss))

model.train()
for epoch in range(200):
    training_loss = 0.0
    x_train = torch.clone(x_total[0:split_index,:])
    y_train = torch.clone(y_total[0:split_index])
    for i in range(history,len(x_train)-1):
        optimizer.zero_grad() # make gradients zero 
        x_temp = x_train[i-history:i,:]
        y_pred = model(x_temp)
        loss = mse_loss(y_pred, y_train[i+1])
        loss.backward() # computing gradients 
        optimizer.step() # updating weights 
        training_loss += loss.item() 
    if((epoch+1)%10 == 0):
        model.eval()
        validation_loss = 0.0
        x_test = torch.clone(x_total[split_index-history:,:])
        y_test = torch.clone(y_total[split_index-history:])
        for i in range(history,len(x_test)-1):
            x_temp = x_test[i-history:i,:]
            y_pred = model(x_temp)
            loss = mse_loss(y_pred, y_test[i+1])
            validation_loss += loss.item()
        print('epoch %d | training loss %0.2f | validation loss %0.2f'%(epoch, training_loss, validation_loss))
        model.train()        

attributes |  9
training |  200
test |  71
---------------
loss before training 48.37
epoch 9 | training loss 367.78 | validation loss 9.33
epoch 19 | training loss 283.78 | validation loss 5.06
epoch 29 | training loss 211.07 | validation loss 3.96
epoch 39 | training loss 144.08 | validation loss 2.89
epoch 49 | training loss 100.24 | validation loss 2.67
epoch 59 | training loss 73.42 | validation loss 2.81
epoch 69 | training loss 56.47 | validation loss 3.08
epoch 79 | training loss 46.60 | validation loss 3.27
epoch 89 | training loss 37.74 | validation loss 3.21
epoch 99 | training loss 31.56 | validation loss 3.31
epoch 109 | training loss 34.36 | validation loss 3.70
epoch 119 | training loss 29.89 | validation loss 5.32
epoch 129 | training loss 28.55 | validation loss 7.07


KeyboardInterrupt: 