In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, TensorDataset, DataLoader

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
#define available device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
#device = 'cpu'
print(device)

cpu


## Generate data for our future Network

In [None]:
# generate randomly sequence from 0 to 9 for x, and y: y_1 = x1, then y_i = x_i + x_1, if y_i >= 10, then y_i = y_i -10
torch.manual_seed(69)
x = torch.randint(high = 9, size = (1000, ), device = device, dtype = torch.float64).long()
y = torch.zeros(x.shape, device = device, dtype = torch.float64).long()

In [None]:
#generate corresponding y
for i in range(len(x)):
  if i == 0:
    y[i] = x[i]
  else:
    y[i] = x[i] + x[0]
    if y[i] >= 10:
      y[i] = y[i] - 10

In [None]:
# view tensors as sequences of 10
x_batch = x.view(-1, 10)
y_batch = y.view(-1,10)

In [None]:
# create pairs x and y
len_examples = len(y_batch)
dataset = []
for i in range(len_examples):
  dataset.append([x_batch[i], y_batch[i]])



In [None]:
#creat test-train split 80 % test
train_tensor = dataset[:int(len(y_batch)*0.8)]
test_tensor = dataset[int(len(y_batch)*0.8):]
test_tensor[:2]

[[tensor([3, 5, 8, 8, 5, 5, 5, 4, 0, 8]),
  tensor([0, 2, 5, 5, 2, 2, 2, 1, 7, 5])],
 [tensor([1, 4, 2, 8, 4, 5, 1, 7, 1, 3]),
  tensor([8, 1, 9, 5, 1, 2, 8, 4, 8, 0])]]

In [None]:
embedding_size = 10
hidden_size = 10
num_classes = 10

LSTM Model

In [None]:
class LSTM(torch.nn.Module):
        
    def __init__(self, num_classes = num_classes, embedding_dim = embedding_size, hidden_dim = hidden_size):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.embedding = torch.nn.Embedding(num_classes, embedding_dim)
        self.lstm = torch.nn.LSTM(embedding_dim, hidden_dim) 
        self.linear = torch.nn.Linear(hidden_dim, num_classes)
        # Initialize h0 and c0:
        self.hidden = (torch.zeros(1,1,hidden_dim).to(device),
                       torch.zeros(1,1,hidden_dim).to(device))

    def forward(self, X):
        model_in = self.embedding(X)
        model_in = model_in.unsqueeze(1)
        model_out, self.hidden = self.lstm(model_in, self.hidden)
        pred = self.linear(model_out)
        pred = pred.transpose(1, 2)
        return pred
    

In [None]:
model = LSTM()
softmax = torch.nn.functional.softmax
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
model.to(device)
model

LSTM(
  (embedding): Embedding(10, 10)
  (lstm): LSTM(10, 10)
  (linear): Linear(in_features=10, out_features=10, bias=True)
)

In [None]:
num_epochs = 20
accuracies, max_accuracy = [], 0
for epoch in range(num_epochs):
    model.train()
    print('Epoch: {}'.format(epoch))
    for x, y in train_tensor:
        optimizer.zero_grad()
        model.hidden = (torch.zeros(1,1,model.hidden_dim),
                        torch.zeros(1,1,model.hidden_dim))
        prediction = model(x)
        #print(f'prediction {prediction}')
        y = y.unsqueeze(1)
        #print(f'y {y}')
        # Calculate loss.
        loss = criterion(prediction, y) 
        # Backpropagate
        loss.backward()
        # Update weights
        optimizer.step()
    print('Loss: {:6.4f}'.format(loss.item()))
      

Epoch: 0
Loss: 0.2790
Epoch: 1
Loss: 0.0554
Epoch: 2
Loss: 0.0266
Epoch: 3
Loss: 0.0163
Epoch: 4
Loss: 0.0114
Epoch: 5
Loss: 0.0086
Epoch: 6
Loss: 0.0068
Epoch: 7
Loss: 0.0056
Epoch: 8
Loss: 0.0047
Epoch: 9
Loss: 0.0040
Epoch: 10
Loss: 0.0034
Epoch: 11
Loss: 0.0030
Epoch: 12
Loss: 0.0026
Epoch: 13
Loss: 0.0023
Epoch: 14
Loss: 0.0020
Epoch: 15
Loss: 0.0018
Epoch: 16
Loss: 0.0017
Epoch: 17
Loss: 0.0015
Epoch: 18
Loss: 0.0014
Epoch: 19
Loss: 0.0013


In [None]:
with torch.no_grad():
        matches, total = 0, 0
        for x, y in test_tensor:
            model.eval()
            model.hidden = (torch.zeros(1,1,model.hidden_dim),
                        torch.zeros(1,1,model.hidden_dim))
            scores = model(x)
            # Compute a softmax over the outputs
            predictions = softmax(scores, dim=1)
            # Choose the number with the maximum probability
            _, batch_out = predictions.max(dim=1)
            # Remove fake dimension
            batch_out = batch_out.squeeze(1)
            # Calculate accuracy
            matches += torch.eq(batch_out, y).sum().item()
            print(batch_out, y)
            total += torch.numel(batch_out)
        accuracy = matches / total
        print('Accuracy: {:4.2f}%'.format(accuracy * 100))

tensor([3, 2, 4, 4, 7, 5, 9, 0, 8, 3]) tensor([3, 2, 4, 4, 7, 5, 9, 0, 8, 3])
tensor([8, 8, 6, 6, 2, 0, 2, 3, 4, 2]) tensor([8, 8, 6, 6, 2, 0, 2, 3, 4, 2])
tensor([4, 9, 6, 8, 4, 9, 2, 7, 9, 8]) tensor([4, 9, 6, 8, 4, 9, 2, 7, 9, 8])
tensor([8, 6, 0, 7, 6, 0, 3, 4, 0, 9]) tensor([8, 6, 0, 7, 6, 0, 3, 4, 0, 9])
tensor([5, 7, 4, 7, 7, 0, 4, 0, 7, 5]) tensor([5, 7, 4, 7, 7, 0, 4, 0, 7, 5])
tensor([5, 4, 2, 2, 0, 8, 7, 3, 6, 6]) tensor([5, 4, 2, 2, 0, 8, 7, 3, 6, 6])
tensor([7, 6, 7, 5, 5, 0, 5, 0, 5, 9]) tensor([7, 6, 7, 5, 5, 0, 5, 0, 5, 9])
tensor([3, 5, 9, 0, 5, 9, 0, 3, 6, 8]) tensor([3, 5, 9, 0, 5, 9, 0, 3, 6, 8])
tensor([8, 3, 9, 6, 9, 8, 0, 8, 3, 0]) tensor([8, 3, 9, 6, 9, 8, 0, 8, 3, 0])
tensor([3, 4, 4, 4, 8, 9, 4, 2, 4, 6]) tensor([3, 4, 4, 4, 8, 9, 4, 2, 4, 6])
tensor([4, 9, 5, 9, 4, 5, 4, 4, 7, 4]) tensor([4, 9, 5, 9, 4, 5, 4, 4, 7, 4])
tensor([7, 5, 9, 3, 3, 5, 3, 5, 0, 8]) tensor([7, 5, 9, 3, 3, 5, 3, 5, 0, 8])
tensor([5, 2, 4, 0, 6, 8, 8, 2, 5, 7]) tensor([5, 2, 4, 0, 6, 8,

Simple RNN model

In [None]:
class RNN(torch.nn.Module):
        
    def __init__(self, num_classes = num_classes, embedding_dim = embedding_size, hidden_dim = hidden_size):
        super(RNN, self).__init__()
        self.embedding = torch.nn.Embedding(num_classes, embedding_dim)
        self.rnn = torch.nn.RNN(embedding_dim, hidden_dim) 
        self.linear = torch.nn.Linear(hidden_dim, num_classes)
        # Initialize hidden:
        #self.hidden = torch.zeros(1,1,hidden_dim)

    def forward(self, X):
        model_in = self.embedding(X)
        model_in = model_in.unsqueeze(1)
        out, hidden = self.rnn(model_in)
        out = self.linear(out)
        out = out.view(-1, num_classes).squeeze()
        out = out.unsqueeze(2)
        return out
    

In [None]:
model_RNN = RNN()
softmax = torch.nn.functional.softmax
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_RNN.parameters(), lr=0.01)
model_RNN.to(device)
model_RNN

RNN(
  (embedding): Embedding(10, 10)
  (rnn): RNN(10, 10)
  (linear): Linear(in_features=10, out_features=10, bias=True)
)

In [None]:
num_epochs = 20
accuracies, max_accuracy = [], 0
for epoch in range(num_epochs):
    model_RNN.train()
    print('Epoch: {}'.format(epoch))
    for x, y in train_tensor:
        optimizer.zero_grad()
        prediction = model_RNN(x)
        #print(prediction.shape)
        #print(f'prediction {prediction}')
        y_in = y.unsqueeze(1)
        #print(f'y_in {y_in}')
        # Calculate loss
        loss = criterion(prediction, y_in) 
        # Backpropagate
        loss.backward()
        # Update weights
        optimizer.step()

    print('Loss: {:6.4f}'.format(loss.item()))
      

Epoch: 0
Loss: 0.2277
Epoch: 1
Loss: 0.0461
Epoch: 2
Loss: 0.0237
Epoch: 3
Loss: 0.0153
Epoch: 4
Loss: 0.0111
Epoch: 5
Loss: 0.0087
Epoch: 6
Loss: 0.0071
Epoch: 7
Loss: 0.0060
Epoch: 8
Loss: 0.0052
Epoch: 9
Loss: 0.0046
Epoch: 10
Loss: 0.0042
Epoch: 11
Loss: 0.0038
Epoch: 12
Loss: 0.0035
Epoch: 13
Loss: 0.0032
Epoch: 14
Loss: 0.0030
Epoch: 15
Loss: 0.0028
Epoch: 16
Loss: 0.0027
Epoch: 17
Loss: 0.0025
Epoch: 18
Loss: 0.0024
Epoch: 19
Loss: 0.0023


In [None]:
with torch.no_grad():
        matches, total = 0, 0
        for x, y in test_tensor:
            model_RNN.eval()
            scores = model_RNN(x)
            # Compute a softmax over the outputs
            predictions = softmax(scores, dim=1)
            # Choose the number with the maximum probability
            _, batch_out = predictions.max(dim=1)
            # Remove fake dimension
            batch_out = batch_out.squeeze(1)
            # Calculate accuracy
            matches += torch.eq(batch_out, y).sum().item()
            print(batch_out, y)
            total += torch.numel(batch_out)
        accuracy = matches / total
        print('Accuracy: {:4.2f}%'.format(accuracy * 100))

tensor([3, 2, 4, 4, 7, 5, 9, 0, 8, 3]) tensor([3, 2, 4, 4, 7, 5, 9, 0, 8, 3])
tensor([8, 8, 6, 6, 2, 0, 2, 3, 4, 2]) tensor([8, 8, 6, 6, 2, 0, 2, 3, 4, 2])
tensor([4, 9, 6, 8, 4, 9, 2, 7, 9, 8]) tensor([4, 9, 6, 8, 4, 9, 2, 7, 9, 8])
tensor([8, 6, 0, 7, 6, 0, 3, 4, 0, 9]) tensor([8, 6, 0, 7, 6, 0, 3, 4, 0, 9])
tensor([5, 7, 4, 7, 7, 0, 4, 0, 7, 5]) tensor([5, 7, 4, 7, 7, 0, 4, 0, 7, 5])
tensor([5, 4, 2, 2, 0, 8, 7, 3, 6, 6]) tensor([5, 4, 2, 2, 0, 8, 7, 3, 6, 6])
tensor([7, 6, 7, 5, 5, 0, 5, 0, 5, 9]) tensor([7, 6, 7, 5, 5, 0, 5, 0, 5, 9])
tensor([3, 5, 9, 0, 5, 9, 0, 3, 6, 8]) tensor([3, 5, 9, 0, 5, 9, 0, 3, 6, 8])
tensor([8, 3, 9, 6, 9, 8, 0, 8, 3, 0]) tensor([8, 3, 9, 6, 9, 8, 0, 8, 3, 0])
tensor([3, 4, 4, 4, 8, 9, 4, 2, 4, 6]) tensor([3, 4, 4, 4, 8, 9, 4, 2, 4, 6])
tensor([4, 9, 5, 9, 4, 5, 4, 4, 7, 4]) tensor([4, 9, 5, 9, 4, 5, 4, 4, 7, 4])
tensor([7, 5, 9, 3, 3, 5, 3, 5, 0, 8]) tensor([7, 5, 9, 3, 3, 5, 3, 5, 0, 8])
tensor([5, 2, 4, 0, 6, 8, 8, 2, 5, 7]) tensor([5, 2, 4, 0, 6, 8,

GRU Model

In [None]:
class GRU(torch.nn.Module):
        
    def __init__(self, num_classes = num_classes, embedding_dim = embedding_size, hidden_dim = hidden_size):
        super(GRU, self).__init__()
        self.hidden_dim = hidden_dim
        self.embedding = torch.nn.Embedding(num_classes, embedding_dim)
        self.gru = torch.nn.GRU(embedding_dim, hidden_dim) 
        self.linear = torch.nn.Linear(hidden_dim, num_classes)
        # Initialize h0:
        self.hidden = torch.zeros(1,1,hidden_dim).to(device)

    def forward(self, X):
        model_in = self.embedding(X)
        model_in = model_in.unsqueeze(1)
        model_out, self.hidden = self.gru(model_in, self.hidden)
        pred = self.linear(model_out)
        pred = pred.transpose(1, 2)
        return pred

In [None]:
model_GRU = GRU()
softmax = torch.nn.functional.softmax
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_GRU.parameters(), lr=0.01)
model_GRU.to(device)
model_GRU

GRU(
  (embedding): Embedding(10, 10)
  (gru): GRU(10, 10)
  (linear): Linear(in_features=10, out_features=10, bias=True)
)

In [None]:
num_epochs = 20
accuracies, max_accuracy = [], 0
for epoch in range(num_epochs):
    model_GRU.train()
    print('Epoch: {}'.format(epoch))
    for x, y in train_tensor:
        optimizer.zero_grad()
        model_GRU.hidden = torch.zeros(1,1,model.hidden_dim)
        prediction = model_GRU(x)
        #print(prediction.shape)
        #print(f'prediction {prediction}')
        y_in = y.unsqueeze(1)
        #print(f'y_in {y_in}')
        # Calculate loss
        loss = criterion(prediction, y_in) 
        # Backpropagate
        loss.backward()
        # Update weights
        optimizer.step()

    print('Loss: {:6.4f}'.format(loss.item()))

Epoch: 0
Loss: 0.2250
Epoch: 1
Loss: 0.0500
Epoch: 2
Loss: 0.0248
Epoch: 3
Loss: 0.0155
Epoch: 4
Loss: 0.0098
Epoch: 5
Loss: 0.0073
Epoch: 6
Loss: 0.0057
Epoch: 7
Loss: 0.0047
Epoch: 8
Loss: 0.0039
Epoch: 9
Loss: 0.0033
Epoch: 10
Loss: 0.0028
Epoch: 11
Loss: 0.0024
Epoch: 12
Loss: 0.0021
Epoch: 13
Loss: 0.0019
Epoch: 14
Loss: 0.0016
Epoch: 15
Loss: 0.0014
Epoch: 16
Loss: 0.0013
Epoch: 17
Loss: 0.0012
Epoch: 18
Loss: 0.0011
Epoch: 19
Loss: 0.0010


In [None]:
with torch.no_grad():
        matches, total = 0, 0
        for x, y in test_tensor:
            model.eval()
            scores = model_GRU(x)
            # Compute a softmax over the outputs
            predictions = softmax(scores, dim=1)
            # Choose the number with the maximum probability
            _, batch_out = predictions.max(dim=1)
            # Remove fake dimension
            batch_out = batch_out.squeeze(1)
            # Calculate accuracy
            matches += torch.eq(batch_out, y).sum().item()
            print(batch_out, y)
            total += torch.numel(batch_out)
        accuracy = matches / total
        print('Accuracy: {:4.2f}%'.format(accuracy * 100))

tensor([0, 2, 5, 5, 2, 2, 2, 1, 7, 5]) tensor([0, 2, 5, 5, 2, 2, 2, 1, 7, 5])
tensor([8, 1, 9, 5, 1, 2, 8, 4, 8, 0]) tensor([8, 1, 9, 5, 1, 2, 8, 4, 8, 0])
tensor([3, 5, 2, 2, 7, 1, 9, 8, 0, 0]) tensor([3, 5, 2, 2, 7, 1, 9, 8, 0, 0])
tensor([8, 2, 2, 3, 0, 7, 3, 9, 0, 1]) tensor([8, 2, 2, 3, 0, 7, 3, 9, 0, 1])
tensor([7, 9, 0, 8, 7, 7, 9, 1, 7, 9]) tensor([7, 9, 0, 8, 7, 7, 9, 1, 7, 9])
tensor([9, 2, 2, 5, 9, 8, 3, 5, 4, 0]) tensor([9, 2, 2, 5, 9, 8, 3, 5, 4, 0])
tensor([7, 5, 2, 8, 0, 7, 8, 2, 8, 8]) tensor([7, 5, 2, 8, 0, 7, 8, 2, 8, 8])
tensor([3, 8, 1, 5, 5, 5, 3, 0, 2, 7]) tensor([3, 8, 1, 5, 5, 5, 3, 0, 2, 7])
tensor([3, 0, 8, 5, 7, 5, 0, 9, 0, 1]) tensor([3, 0, 8, 5, 7, 5, 0, 9, 0, 1])
tensor([2, 7, 8, 2, 9, 0, 5, 2, 1, 7]) tensor([2, 7, 8, 2, 9, 0, 5, 2, 1, 7])
tensor([1, 2, 8, 0, 1, 7, 1, 5, 4, 3]) tensor([1, 2, 8, 0, 1, 7, 1, 5, 4, 3])
tensor([8, 7, 5, 4, 3, 2, 0, 5, 7, 1]) tensor([8, 7, 5, 4, 3, 2, 0, 5, 7, 1])
tensor([2, 2, 0, 0, 9, 5, 3, 2, 2, 5]) tensor([2, 2, 0, 0, 9, 5,