In [1]:
from matplotlib.pyplot import figure
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch.nn import *
from datetime import datetime

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
X_train = [[1,2,3], [0,0,0], [1,1,1], [9,6,3], [3,4,5],
           [2,2,1], [0,0,0], [1,1,1], [9,6,3], [3,4,5],
           [1,2,3], [0,0,0], [1,1,1], [9,6,3], [3,4,5],]
y_train = [1,2,3,4,5,
           6,7,8,9,10,
           1,2,3,4,5,]
X_test = [list(range(i,i+3)) for i in range(10,12)]
y_test = [i+3 for i in range(10,12)]

In [3]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = torch.Tensor(X)
        self.y = torch.Tensor(y).unsqueeze(1)
 
    def __len__(self):
        return len(self.X)
 
    def __getitem__(self, idx):
        return [self.X[idx], self.y[idx]]

In [43]:
batchSize = 1
dataset_train = Dataset(X_train, y_train)
dataset_test = Dataset(X_test, y_test)
train_dl = torch.utils.data.DataLoader(dataset_train, batch_size=batchSize, shuffle=False)
test_dl = torch.utils.data.DataLoader(dataset_test, batch_size=batchSize, shuffle=False)

In [44]:
class ElmanRNN(torch.nn.Module):
    def __init__(self, n_inputs):
        super(ElmanRNN, self).__init__()
        self.hidden_size = 10
        self.W_x = Linear(n_inputs, self.hidden_size)
        self.W_h = Linear(self.hidden_size, self.hidden_size)
        self.W_o = Linear(self.hidden_size, 1)
        self.activation = LeakyReLU()

 
    def forward(self, X, h):
        X = self.W_x(X)
        h = self.W_h(h)
        h = self.activation(X + h)
        X = self.W_o(h)
        return X, h
    
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

In [45]:
model_0 = ElmanRNN(len(X_train[0])).to(device)

In [50]:
criterion = MSELoss()   
optimizer = torch.optim.Adam(model_0.parameters(), lr=1e-4, weight_decay=1e-7)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, min_lr=1e-6, factor=0.5)

In [47]:
h = model_0.init_hidden().to(device)

In [51]:
model_0.train()
for epoch in range(301):
    for i, (inputs, targets) in enumerate(train_dl):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        yhat, h = model_0(inputs, h)
        loss = criterion(yhat, targets)
        loss.backward(retain_graph=True)
        optimizer.step()
        h = h.detach()
        
with torch.no_grad():
    for i, (inputs, targets) in enumerate(train_dl):
        inputs, targets = inputs.to(device), targets.to(device)
        yhat, h = model_0(inputs, h)
        loss = criterion(yhat, targets)
        print(inputs, targets)
        print(yhat)

tensor([[1., 2., 3.]], device='cuda:0') tensor([[1.]], device='cuda:0')
tensor([[1.0834]], device='cuda:0')
tensor([[0., 0., 0.]], device='cuda:0') tensor([[2.]], device='cuda:0')
tensor([[1.7234]], device='cuda:0')
tensor([[1., 1., 1.]], device='cuda:0') tensor([[3.]], device='cuda:0')
tensor([[2.9830]], device='cuda:0')
tensor([[9., 6., 3.]], device='cuda:0') tensor([[4.]], device='cuda:0')
tensor([[3.7663]], device='cuda:0')
tensor([[3., 4., 5.]], device='cuda:0') tensor([[5.]], device='cuda:0')
tensor([[4.6752]], device='cuda:0')
tensor([[2., 2., 1.]], device='cuda:0') tensor([[6.]], device='cuda:0')
tensor([[5.8460]], device='cuda:0')
tensor([[0., 0., 0.]], device='cuda:0') tensor([[7.]], device='cuda:0')
tensor([[7.6912]], device='cuda:0')
tensor([[1., 1., 1.]], device='cuda:0') tensor([[8.]], device='cuda:0')
tensor([[6.8943]], device='cuda:0')
tensor([[9., 6., 3.]], device='cuda:0') tensor([[9.]], device='cuda:0')
tensor([[9.0445]], device='cuda:0')
tensor([[3., 4., 5.]], devic

In [None]:
x = torch.Tensor([[1,2,3]]).to(device)
y = torch.Tensor([[4]]).to(device)
for j in range(1):
    optimizer.zero_grad()
    yhat, h = model(x, h)
    criterion(y, yhat).backward()
    optimizer.step()
    h = h.detach()
    
h, yhat

In [None]:
start_time = datetime.now()

for epoch in range(101):
    train_loss = 0
    model.train()
    for i, (inputs, targets) in enumerate(train_dl):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        yhat, h = model(inputs, h)
        loss = criterion(yhat, targets)
        loss.backward(retain_graph=True)
        optimizer.step()
        train_loss += loss
        
    model.eval()
    
    with torch.no_grad():
        eval_loss = 0
        for i, (inputs, targets) in enumerate(test_dl):
            inputs, targets = inputs.to(device), targets.to(device)
            yhat, h = model(inputs, h)
            loss = criterion(yhat, targets)
            eval_loss += loss
        
    #scheduler.step(eval_loss)
    
    if epoch % 100 == 0:
        print("Epoch: " + str(epoch) + ", train loss: " + str(train_loss/len(train_dl)) + ", eval loss: " + str(eval_loss/len(test_dl)))

print(datetime.now() - start_time)

In [None]:
with torch.no_grad():
    for i, (inputs, targets) in enumerate(test_dl):
        print(inputs, targets)
        inputs, targets = inputs.to(device), targets.to(device)
        yhat = model(inputs)
        print(yhat)
        #break