<a href="https://colab.research.google.com/github/CharlesPoletowin/YCBS-273/blob/master/Lecture7_RNN_intro.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import time
from sklearn.metrics import accuracy_score

This notebook was inspired from https://github.com/bentrevett/pytorch-sentiment-analysis. Great thanks to the authors!

# Data setup

In [0]:
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

def get_data(seq_len, inp_dim, device, data_size=25000):

  data = torch.randint(low=0, high=inp_dim, size=(data_size, seq_len), out=None, device=device)
  labels = torch.abs(data[:, 0])

  train_data = TensorDataset(data[:int(0.7*data_size)], labels[:int(0.7*data_size)])
  valid_data = TensorDataset(data[int(0.7*data_size): int(0.85*data_size)], labels[int(0.7*data_size): int(0.85*data_size)])
  test_data = TensorDataset(data[int(0.85*data_size): int(data_size)],  labels[int(0.85*data_size): int(data_size)])

  train_data_loader = DataLoader(train_data, batch_size=64)
  valid_data_loader = DataLoader(valid_data, batch_size=64)
  test_data_loader = DataLoader(test_data, batch_size=64)
  
  return train_data_loader, valid_data_loader, test_data_loader

# Model definition

In [0]:
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
        
        super().__init__()
        
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        
        self.rnn = nn.RNN(embedding_dim, hidden_dim)
        
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, text):

        #text = [seq len, batch size]
        
        embedded = self.embedding(text)
        
        #embedded = [seq len, batch size, emb dim]
        
        output, hidden = self.rnn(embedded)
        
        #output = [seq len, batch size, hid dim]
        #hidden = [1, batch size, hid dim]
        
        assert torch.equal(output[-1,:,:], hidden.squeeze(0))
        
        
        
        return self.fc(hidden.squeeze(0))

In [0]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Loss function

In [0]:
import torch.nn.functional as F

loss_func = F.cross_entropy

# Optimizer

In [0]:
import torch.optim as optim

In [0]:
def evaluate(model, data_iterator, loss_func):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for inp, label in data_iterator:

            predictions = model(inp.t()).squeeze(1)
            
            loss = loss_func(predictions, label)
            
            acc = accuracy_score(torch.argmax(predictions, dim=1).cpu().detach().numpy(), label.cpu().numpy())

            epoch_loss += loss.item()
            epoch_acc += acc
        
    return epoch_acc / len(data_iterator), epoch_loss / len(data_iterator)

# Training

In [0]:
def train_model(model, train_data, valid_data, loss_func, optimizer, epochs=5):

  for epoch in range(epochs):
    
    model.train()
    epoch_loss = 0
    epoch_acc = 0
    
    tic = time.time()
    for inp, label in train_data:
      
      predictions = model(inp.t()).squeeze(1)
      loss = loss_func(predictions, label)
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()

      epoch_loss += loss.item()
      
    toc = time.time()
    
    train_acc, _ = evaluate(model, train_data, loss_func)
    acc, _ = evaluate(model, valid_data, loss_func)
    toe = time.time()
    print(len(train_data))
    print('Loss at epoch %d : %f, train acc : %f, valid acc : %f | train time : %d sec, eval time : %d sec' % (epoch, epoch_loss / len(train_data), train_acc, acc, toc-tic, toe - toc))

In [0]:
SEQ_LEN = 10
INPUT_DIM = 50
OUTPUT_DIM = INPUT_DIM

EMBEDDING_DIM = 32
HIDDEN_DIM = 256

N_LAYERS = 1
BIDIRECTIONAL = False
DROPOUT = 0

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_data_loader, valid_data_loader, test_data_loader = get_data(SEQ_LEN, INPUT_DIM, device=device, data_size=100000)
model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM) #, N_LAYERS, BIDIRECTIONAL, DROPOUT)
model = model.to(device)

optimizer = optim.Adam(model.parameters(), weight_decay=0.00001)

print(f'The model has {count_parameters(model):,} trainable parameters')

train_model(model, train_data_loader, valid_data_loader, loss_func, optimizer, epochs=15)

The model has 88,690 trainable parameters
1094
Loss at epoch 0 : 0.448821, train acc : 1.000000, valid acc : 1.000000 | train time : 4 sec, eval time : 2 sec
1094
Loss at epoch 1 : 0.001877, train acc : 1.000000, valid acc : 1.000000 | train time : 4 sec, eval time : 2 sec
1094
Loss at epoch 2 : 0.000670, train acc : 1.000000, valid acc : 1.000000 | train time : 4 sec, eval time : 2 sec
1094
Loss at epoch 3 : 0.000333, train acc : 1.000000, valid acc : 1.000000 | train time : 4 sec, eval time : 2 sec
1094
Loss at epoch 4 : 0.032066, train acc : 0.999986, valid acc : 1.000000 | train time : 4 sec, eval time : 2 sec
1094
Loss at epoch 5 : 0.003594, train acc : 1.000000, valid acc : 1.000000 | train time : 4 sec, eval time : 2 sec
1094
Loss at epoch 6 : 0.000300, train acc : 1.000000, valid acc : 1.000000 | train time : 4 sec, eval time : 2 sec
1094
Loss at epoch 7 : 0.000162, train acc : 1.000000, valid acc : 1.000000 | train time : 4 sec, eval time : 2 sec
1094
Loss at epoch 8 : 0.00011