![Fixel Algorithms](https://fixelalgorithms.co/images/CCExt.png)

# <center> Deep Learning Methods </center>
## <center> Lecture 10 - RNN </center>
### <center> Advanced RNN </center>

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/FixelAlgorithmsTeam/FixelCourses/blob/master/DeepLearningMethods/10_RNN/MainAdvancedRNN_Solution.ipynb)

In [1]:
#-- Wide screen:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [2]:
#-- Auto reload:
%load_ext autoreload
%autoreload 2

In [3]:
import numpy             as np
import matplotlib.pyplot as plt

import torch
import torch.nn    as nn
import torch.optim as optim
import torchsummary
import torchtext

from torchtext.data.utils import get_tokenizer
from torchtext.vocab      import Vocab
from torchtext.datasets   import IMDB

### Exercise:
* Use the advanced units and get above 85% accuracy on the sentiment analysis task.  
* (Keep all sequences with less than 150 words.)

In [34]:
dirPath             = '../../data'
oTokenizer          = get_tokenizer('basic_english')

trainIter, testIter = IMDB(root=dirPath)

In [35]:
def NumWords(line):
    return len(line.split())

maxLength = 150
lTrainSet = [(label, line) for (label, line) in trainIter if NumWords(line) < maxLength]
lTestSet  = [(label, line) for (label, line) in testIter  if NumWords(line) < maxLength]

In [36]:
len(trainIter), len(testIter), len(lTrainSet), len(lTestSet)

(25000, 25000, 9746, 9932)

In [37]:
import random

red   = '\x1b[31m'
green = '\x1b[32m'
end   = '\x1b[0m'

for _  in range(3):
    (label, line) = random.choice(lTrainSet)
    
    color = red if label == 'neg' else green
    print('=========================================================================================================')
    print('=========================================================================================================')
    print(color + line + end)
    print('--Tokenized:---------------------------')    
    print(oTokenizer(line))

[32mOften laugh out loud, sometimes sad story of 2 working divorced guys -- Lemmon a neurotic clean "house husband" and Matthau a slob sportswriter -- who decide to live together to cut down on expenses. <br /><br />Nicely photographed and directed. The script is very barbed -- that is, there's always more than one side to almost every line. Particularly funny scene involves 2 british sisters (Evans and Shelley) who seem amused by everything anyone says, but when Lemmon busts out his photos of kids and, yes, ex-wife-to-be, he has the girls sobbing along with him before Matthau can show up with the promised drinks!<br /><br />Very entertaining.[0m
--Tokenized:---------------------------
['often', 'laugh', 'out', 'loud', ',', 'sometimes', 'sad', 'story', 'of', '2', 'working', 'divorced', 'guys', '--', 'lemmon', 'a', 'neurotic', 'clean', 'house', 'husband', 'and', 'matthau', 'a', 'slob', 'sportswriter', '--', 'who', 'decide', 'to', 'live', 'together', 'to', 'cut', 'down', 'on', 'expense

In [38]:
from collections import Counter

oCounter  = Counter()
for ii, (label, line) in enumerate(lTrainSet):
    oCounter.update(oTokenizer(line))
    
oVocab = Vocab(oCounter, min_freq=10, specials=('<unk>', '<BOS>', '<EOS>', '<pad>'))
oVocab

<torchtext.vocab.Vocab at 0x23572dd8df0>

In [39]:
len(oVocab)

6321

In [41]:
def TextTransform(line):
    return torch.tensor([oVocab['<BOS>']] + [oVocab[token] for token in oTokenizer(line)] + [oVocab['<EOS>']])

def LabelTransform(label):
    return 1 if label == 'pos' else 0

# Print out the output of text_transform
line = 'Hello World!'
print('Input :', line)
print('Output:', TextTransform(line))

Input : Hello World!
Output: tensor([   1, 3630,  212,   25,    2])


In [42]:
from torch.utils.data   import DataLoader
from torch.nn.utils.rnn import pack_sequence

def CollateBatch(lBatch):
    Nb  = len(lBatch)
    vY  = torch.zeros(Nb)
    lX  = [None] * Nb
    for ii, (label, line) in enumerate(lBatch):
        vY[ii] = LabelTransform(label)
        lX[ii] = TextTransform (line)
    
    mPackX = pack_sequence(lX, enforce_sorted=False)

    return mPackX, vY

In [43]:
batchSize = 128
oTrainDL  = DataLoader(lTrainSet, batch_size=batchSize,   shuffle=True, collate_fn=CollateBatch)
oTestDL   = DataLoader(lTestSet,  batch_size=2*batchSize, shuffle=True, collate_fn=CollateBatch)

In [44]:
from torch.nn.utils.rnn import PackedSequence

def PackedAs(mX, mPack):
    return PackedSequence(mX, mPack.batch_sizes, None, mPack.unsorted_indices)

V = len(oVocab)
D = 24
H = 16
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.oEmbedding = nn.Embedding(V,   D, padding_idx=oVocab['<pad>'])
        self.oGRU       = nn.GRU      (D,   H, num_layers=1, bidirectional=True)
        self.oFC        = nn.Linear   (2*H, 1)
        
    def forward(self, mPackX):
                                                             #-- mPackX.shape = (N*T,)
        mE     = self.oEmbedding(mPackX.data)                #-- mE    .shape = (N*T, D)
        mPackE = PackedAs       (mE, mPackX)                 #-- mPackE.shape = (N*T, D)
        _, mH  = self.oGRU      (mPackE)                     #-- mH    .shape = (2,   N, H)
        mH     = torch.cat([mH[-1,:,:], mH[-2,:,:]], dim=1)  #-- mH    .shape = (N,   2*H)
        mZ     = self.oFC       (mH)                         #-- mZ    .shape = (N,   1)
        return mZ[:,0]

In [45]:
mX, vY = next(iter(oTrainDL))
RNN()(mX).shape

torch.Size([128])

In [46]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 
Loss   = nn.BCEWithLogitsLoss()

In [None]:
from torch.optim                    import lr_scheduler
from DeepLearningFramework.Metric   import BinaryAcuuracy
from DeepLearningFramework.Training import TrainClassficationModel

nEpochs       = 50
nIter         = nEpochs * len(oTrainDL)
oModel        = RNN().to(DEVICE)
oOptim        = optim.AdamW(oModel.parameters(), lr=2e-5, betas=(0.8, 0.9), weight_decay=5)
oScheduler    = lr_scheduler.OneCycleLR(oOptim, max_lr=2e-3, total_steps=nIter)
lHistory      = TrainClassficationModel(oModel, oTrainDL, oTestDL, Loss, BinaryAcuuracy, nEpochs, oOptim, oScheduler);

Epoch 000: | Train loss: 0.69528 | Val loss: 0.69378 | Train Metric: 0.4928 | Val Metric: 0.4986 | epoch time:  4.828 | <-- Checkpoint!
Epoch 001: | Train loss: 0.69370 | Val loss: 0.69248 | Train Metric: 0.5037 | Val Metric: 0.5124 | epoch time:  4.745 | <-- Checkpoint!
Epoch 002: | Train loss: 0.69232 | Val loss: 0.69167 | Train Metric: 0.5182 | Val Metric: 0.5273 | epoch time:  5.197 | <-- Checkpoint!
Epoch 003: | Train loss: 0.69110 | Val loss: 0.69044 | Train Metric: 0.5368 | Val Metric: 0.5437 | epoch time:  5.162 | <-- Checkpoint!
Epoch 004: | Train loss: 0.69010 | Val loss: 0.68991 | Train Metric: 0.5507 | Val Metric: 0.5537 | epoch time:  5.373 | <-- Checkpoint!
Epoch 005: | Train loss: 0.68932 | Val loss: 0.68896 | Train Metric: 0.5624 | Val Metric: 0.5724 | epoch time:  5.643 | <-- Checkpoint!
Epoch 006: | Train loss: 0.68751 | Val loss: 0.68635 | Train Metric: 0.5840 | Val Metric: 0.5876 | epoch time:  6.075 | <-- Checkpoint!
Epoch 007: | Train loss: 0.67989 | Val loss: 0.6

In [None]:
from DeepLearningFramework.Auxiliary import PlotHistory

PlotHistory(lHistory)