In [54]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader
from torch.optim.lr_scheduler import StepLR
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

In [55]:
# find scale parameters in all the data
country = 'Netherlands'

country_dict = {'Germany':['2002-05-01','2016-12-31'], 'Netherlands':['2000-01-01', '2015-09-10']}

inputs = pd.read_csv('../../../../data/'+country+'/input_data.csv').dropna()
inputs.rename(columns={ inputs.columns[0]: "time" }, inplace = True)
inputs = inputs.set_index('time')[country_dict[country][0]:country_dict[country][1]]

heads = pd.read_csv('../../../../data/'+country+'/heads.csv').dropna()
heads.rename(columns={ heads.columns[0]: "Date" }, inplace = True)
heads = heads.set_index('Date')[country_dict[country][0]:country_dict[country][1]]

dates = inputs.index
inputs = inputs.values
heads = heads.values

# add heads to input data
# inputs = np.append(inputs, heads, axis=1)

# make heads as diff
# heads = np.insert(np.diff(heads, axis=0), 0, 0, axis=0)

T = 90
D = inputs.shape[1]
N = len(inputs) - T
test_ratio = 0.2
train_ratio = 1 - test_ratio
Ntrain = int(N * train_ratio)

x_scaler = MinMaxScaler()
# x_scaler.fit(inputs[:Ntrain])
inputs = x_scaler.fit_transform(inputs)

y_scaler = MinMaxScaler()
# y_scaler.fit(heads[:Ntrain])
heads = y_scaler.fit_transform(heads)


# Building sample data
X = np.zeros((N, T, D))
y = np.zeros(N)

for t in range(N):
    X[t, : ,:] = inputs[t:t+T]
    y[t] = heads[t+T, 0]

y = np.expand_dims(y, axis=1)
val_ratio = test_ratio / (1 - test_ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, shuffle=False)

dataX = Variable(torch.Tensor(X).cuda())
dataY = Variable(torch.Tensor(y).cuda())

trainX = Variable(torch.Tensor(X_train).cuda())
trainY = Variable(torch.Tensor(y_train).cuda())

testX = Variable(torch.Tensor(X_test).cuda())
testY = Variable(torch.Tensor(y_test).cuda())

In [56]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size).cuda())
        
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size).cuda())
        
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        
        h_out = h_out.view(-1, self.hidden_size)
        
        out = self.fc(h_out)
        
        return out

In [57]:
# export Germany

# find scale parameters in all the data

country_dict = {'Germany':['2002-05-01','2021-12-31'], 'Netherlands':['2000-01-01', '2020-11-27']}

inputs = pd.read_csv('../../../../data/'+country+'/input_data.csv').dropna()
inputs.rename(columns={ inputs.columns[0]: "time" }, inplace = True)
inputs = inputs.set_index('time')[country_dict[country][0]:country_dict[country][1]]

heads = pd.read_csv('../../../../data/'+country+'/heads.csv').dropna()
heads.rename(columns={ heads.columns[0]: "Date" }, inplace = True)
heads = heads.set_index('Date')[country_dict[country][0]:country_dict[country][1]]

dates = inputs.index
inputs = inputs.values
heads = heads.values

# add heads to input data
# inputs = np.append(inputs, heads, axis=1)

# make heads as diff
# heads = np.insert(np.diff(heads, axis=0), 0, 0, axis=0)

T = 90
D = inputs.shape[1]
N = len(inputs) - T
test_ratio = 0.2
train_ratio = 1 - test_ratio
Ntrain = int(N * train_ratio)

x_scaler = MinMaxScaler()
# x_scaler.fit(inputs[:Ntrain])
inputs = x_scaler.fit_transform(inputs)

y_scaler = MinMaxScaler()
# y_scaler.fit(heads[:Ntrain])
heads = y_scaler.fit_transform(heads)


# Building sample data
X = np.zeros((N, T, D))
y = np.zeros(N)

for t in range(N):
    X[t, : ,:] = inputs[t:t+T]

dataX = Variable(torch.Tensor(X).cuda())
dataY = Variable(torch.Tensor(y).cuda())

df = pd.DataFrame(columns=['Date'])
df['Date'] = dates

for i in range(1000):
    lstm = torch.load(country+'_'+str(i)+'.pth')
    lstm.eval()

    predict = y_scaler.inverse_transform(lstm(dataX).cpu().data.numpy())[:,0]
    predict = np.concatenate((np.array([predict[0]]*90), predict), axis=0)

    df[str(i)] = predict

reliability_df = df.copy().set_index('Date').T
submission_df = pd.read_csv('../submission_form_'+country+'.csv')
submission_df['95% Lower Bound'] = reliability_df.quantile(.05, axis = 0).to_list()
submission_df['95% Upper Bound'] = reliability_df.quantile(.95, axis = 0).to_list()
submission_df.to_csv('submission_form_'+country+'.csv', index=False) 

  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)] = predict
  df[str(i)