In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn, optim
from sklearn.preprocessing import MinMaxScaler

# function for data process
def createSeqLab(trainData, windowSz):
    # trainData = normalized tensor
    # ret = set of (seq, label)
    ret = []
    dataSz = len(trainData)
    for i in range(dataSz - windowSz):
        seq = trainData[i : i+windowSz]
        label = trainData[i+windowSz]
        ret.append((seq, label))
    return ret

# RNN (LSTM) model
class MyModel(nn.Module):
    def __init__(self, inputSz, hiddenSz, batchSz, outputSz, layerSz):
        super(MyModel, self).__init__()
        self.inputSz = inputSz
        self.hiddenSz = hiddenSz
        self.batchSz = batchSz
        self.outputSz = outputSz
        self.layerSz = layerSz
        self.lstm = nn.LSTM(inputSz, hiddenSz, layerSz)
        self.linear = nn.Linear(hiddenSz, outputSz)
        
    def reset_hidden_state(self):
        self.hidden = (
            torch.zeros(self.layerSz, self.batchSz, self.hiddenSz),
            torch.zeros(self.layerSz, self.batchSz, self.hiddenSz))
    
    def forward(self, seq):
        lstm_out, self.hidden = self.lstm(seq.view(len(seq), self.batchSz, -1), self.hidden)
        lastStep = lstm_out.view(self.batchSz, len(seq), self.hiddenSz)[-1]
        pred = self.linear(lastStep)
        return pred[-1]

    
# Read the train/test file from kaggle
train = pd.read_csv('../input/yaifinal/train_covid.csv')
test = pd.read_csv('../input/yaifinal/test_covid.csv')
# only consider 3 attributes for train file (no missing value)
train = train[['location', 'new_cases', 'new_deaths']]
# make test file to submission format
test = test[['id', 'location']]
test['new_cases'] = 0
test['new_deaths'] = 0
counter = 0

# hyper parameters
windowSz = 7 # consider (windowSz) days to predict next day
hiddenSz = 64
layerSz = 2
numEpoch = 25
alpha = 0.0005 # learning rate



trainSt, testSt = 0, 0
while trainSt < len(train) and testSt < len(test):
    # split data by locaiton
    country = train.loc[trainSt, 'location']
    if country != test.loc[testSt, 'location']:
        print("This train data sucks")
        break
    
    trainEd, testEd = trainSt, testSt
    while trainEd < len(train) and train.loc[trainEd, 'location'] == country:
        trainEd += 1
    while testEd < len(test) and test.loc[testEd, 'location'] == country:
        testEd += 1

    trainData = train.loc[trainSt:trainEd-1, ['new_cases', 'new_deaths']].to_numpy()
    # data normalize
    scaler = MinMaxScaler(feature_range=(-1, 1))
    trainData = scaler.fit_transform(trainData)
    # convert data set into tensor
    trainData = torch.FloatTensor(trainData)
    trainSeqLab = createSeqLab(trainData, windowSz)

    # create LSTM model and train it using trainSeqLab data
    model = MyModel(2, hiddenSz, 1, 2, layerSz)
    optimizer = optim.Adam(model.parameters(), lr=alpha)
    lossFunc = nn.MSELoss()
    
    for i in range(numEpoch):
        for item in trainSeqLab:
            seq, label = item
            model.reset_hidden_state()
            optimizer.zero_grad()  
            pred = model(seq)
            loss = lossFunc(pred, label)
            loss.backward()
            optimizer.step()

    # using trained LSTM model, predict next few days
    wantPred = testEd - testSt
    trainSt, testSt = trainEd, testEd
    predList = trainData.tolist()
    model.eval()
    
    # append predicted values to predList
    for i in range(wantPred):
        seq = torch.FloatTensor(predList[-windowSz:])
        model.reset_hidden_state()
        pred = model(seq)
        predList.append(pred.tolist())
        
    predList = predList[-wantPred:]
    # convert to actual predicted value
    predList = scaler.inverse_transform(np.array(predList))
    # write result
    for i in range(wantPred):
        predCase = max(0, round(predList[i, 0]))
        predDeath = max(0, round(predList[i, 1]))
        test.loc[counter, 'new_cases'] = predCase
        test.loc[counter, 'new_deaths'] = predDeath
        counter += 1
        
    print("Processing:", str(counter) + '/' + str(len(test)))

# make submission format
del test['location']
# save file
test.to_csv("Submit.csv", index=False)

SyntaxError: invalid syntax (<ipython-input-1-5bf2c3849ed4>, line 118)