In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from functools import partial
from sentimentanalyser.data.text import TextList, ItemList, DataBunch, SplitData
from sentimentanalyser.utils.data import Path, listify, random_splitter, compose, parallel, pad_collate, parent_labeler, read_wiki, grandparent_splitter
from sentimentanalyser.data.samplers import SortishSampler, SortSampler
from sentimentanalyser.utils.preprocessing import *
from sentimentanalyser.utils.files import pickle_dump, pickle_load
from sentimentanalyser.preprocessing.processor import TokenizerProcessor, NuemericalizeProcessor, CategoryProcessor

In [None]:
path_imdb = Path("/home/anukoolpurohit/Documents/AnukoolPurohit/Datasets/imdb")

In [None]:
proc_tok = TokenizerProcessor()
proc_num = NuemericalizeProcessor()
proc_cat = CategoryProcessor()

In [None]:
il_imdb = TextList.from_files(path_imdb, folders=['train', 'test'])
sd_imdb = il_imdb.split_by_func(partial(grandparent_splitter, valid_name='test'))
ll_imdb = sd_imdb.label_by_func(parent_labeler, proc_x = [proc_tok, proc_num], proc_y=proc_cat)

In [None]:
pickle_dump(ll_imdb, 'dumps/variable/ll_imdb.pickle')

In [None]:
ll_imdb = pickle_load('dumps/variable/ll_imdb.pickle')

In [None]:
imdb_data = ll_imdb.clas_databunchify(64)

In [None]:
from sentimentanalyser.utils.callbacks import sched_cos, combine_scheds
from sentimentanalyser.callbacks.training import LR_Find, CudaCallback
from sentimentanalyser.callbacks.progress import ProgressCallback
from sentimentanalyser.callbacks.scheduler import ParamScheduler
from sentimentanalyser.callbacks.stats import AvgStatsCallback
from sentimentanalyser.callbacks.recorder import Recorder
from sentimentanalyser.training.trainer import Trainer

In [None]:
def accuracy(preds, y):
    preds   = torch.argmax(preds, dim=1)
    correct = (preds == y).float()
    acc     = correct.sum() / len(correct)
    return acc

In [None]:
import torch
import torch.nn.functional as F
from torch import nn

# Model

In [None]:
def get_lens_and_masks(x, pad_id=1):
    mask = (x == pad_id)
    lenghts = x.size(1) - (x == pad_id).sum(1)
    return lenghts, mask

In [None]:
class Model0(nn.Module):
    def __init__(self, vocab_size=proc_num.vocab_size, num_layers=2,
                 hidden_size=50, output_size=2, bidirectional=True,
                 padding_idx=1, bs=64):
        super().__init__()
        
        self.vocab_size, self.hidden_size, self.output_size   = vocab_size, hidden_size, output_size
        self.num_layers, self.bidirectional, self.padding_idx = num_layers, bidirectional, padding_idx
        self.bidir = 2 if bidirectional is True else 1
        
        self.embedding = nn.Embedding(self.vocab_size, self.hidden_size, padding_idx=self.padding_idx)
        
        self.dropout   = nn.Dropout()
        
        self.rnn       = nn.LSTM(self.hidden_size, self.hidden_size,
                                 num_layers=self.num_layers,
                                 batch_first=True,
                                 bidirectional=self.bidirectional)
        
        self.fc        = nn.Linear(self.hidden_size * (self.bidir * self.num_layers), self.output_size)
        return
    
    def forward(self, texts):
        text_lengths, _ = get_lens_and_masks(texts)
        embeded = self.dropout(self.embedding(texts))
        packed_embed = nn.utils.rnn.pack_padded_sequence(embeded, text_lengths, batch_first=True)
        
        packed_output, (hidden, cell) = self.rnn(packed_embed)
        
        hidden = self.dropout(torch.cat([h for h in hidden], dim=1))
        linear = self.fc(hidden)
        return linear

## Second Model

### Attention

In [None]:
x1,y1 = next(iter(imdb_data.train_dl))

In [None]:
class Attn(nn.Module):
    def __init__(self, method=None, hidden_size=None):
        super().__init__()
        self.method = method
        self.hidden_size = hidden_size
        return
    
    def dot_score(self, hidden, outputs):
        shape = hidden.shape
        hidden = hidden.view(shape[0],shape[2],shape[1]*shape[3])
        hidden = torch.sum(hidden, dim=0)/shape[0]
        return torch.sum(hidden * outputs, dim =2)
    
    def forward(self, hidden, outputs, mask):
        attn_energies = self.dot_score(hidden, outputs)
        attn_energies = attn_energies.t()
        attn_energies = F.softmax(attn_energies, dim=1)
        return attn_energies.masked_fill(mask, 1e-9).unsqueeze(1)

In [None]:
class Model1(nn.Module):
    def __init__(self, vocab_size=proc_num.vocab_size, num_layers=2,
                 hidden_size=50, output_size=2, bidirectional=True,
                 padding_idx=1, bs=64):
        super().__init__()
        
        self.vocab_size, self.hidden_size, self.output_size   = vocab_size, hidden_size, output_size
        self.num_layers, self.bidirectional, self.padding_idx = num_layers, bidirectional, padding_idx
        self.bidir = 2 if bidirectional is True else 1
        
        self.embedding = nn.Embedding(self.vocab_size, self.hidden_size, padding_idx=self.padding_idx)
        
        self.dropout   = nn.Dropout()
        
        self.rnn       = nn.LSTM(self.hidden_size, self.hidden_size,
                                 num_layers=self.num_layers,
                                 batch_first=True,
                                 bidirectional=self.bidirectional)
        
        self.concat    = nn.Linear(self.hidden_size * self.bidir, self.hidden_size)
        self.fc        = nn.Linear(self.hidden_size, self.output_size)
        self.att       = Attn()
        return
    
    def forward(self, texts):
        text_lengths, mask = get_lens_and_masks(texts)
        embeded = self.dropout(self.embedding(texts))
        packed_embed = nn.utils.rnn.pack_padded_sequence(embeded, text_lengths, batch_first=True)
        
        packed_output, (hidden, cell) = self.rnn(packed_embed)
        
        
        outputs, lengths = nn.utils.rnn.pad_packed_sequence(packed_output)
        
        att_energies = self.att(hidden.view(self.num_layers, self.bidir, texts.shape[0], self.hidden_size), outputs, mask)
        context = att_energies.bmm(outputs.transpose(0,1))
        context = context.squeeze(1)
        hidden  = torch.cat([h for h in hidden], dim=1)
        final   = torch.cat([hidden, context],dim=-1)
        context = self.concat(context)
        context = torch.tanh(context)
        linear  = self.fc(self.dropout(context))
        return linear

# Third Model

In [None]:
def print_dims(name, tensor):
    print(f'size of {name} is {tensor.shape}')

In [None]:
class Attn2(nn.Module):
    def __init__(self, method=None, hidden_size=None, context_size=None):
        super().__init__()
        self.method, self.hidden_size, self.context_size = method, hidden_size, context_size
        
        self.fc     = nn.Linear(self.hidden_size, self.context_size)
        self.weight = nn.Parameter(torch.randn(self.context_size))
    
    def forward(self, enc, mask):
        attn_energies = torch.tanh(self.fc(enc))
        weights = torch.softmax(attn_energies.matmul(self.weight), dim=1)
        #weights = weights.masked_fill(mask, 1e-9)
        weights = weights.unsqueeze(1)
        res = torch.sum(weights.matmul(enc), dim=1)
        return res

In [None]:
class Model2(nn.Module):
    def __init__(self, vocab_size=proc_num.vocab_size, hidden_size=50,
                 context_size=50, output_size=2, num_layers=2,
                 bidirectional=True, padding_idx=1, bs=64, dropout=0.5):
        super().__init__()
        self.vocab_size, self.hidden_size       = vocab_size, hidden_size
        self.context_size, self.output_size     = context_size, output_size
        self.num_layers, self.bidirectional     = num_layers, bidirectional
        self.padding_idx, self.bs, self.dropout = padding_idx, bs, dropout
        
        self.bidir = 2 if self.bidirectional else 1
        
        self.embedding = nn.Embedding(self.vocab_size, self.hidden_size,
                                      padding_idx=self.padding_idx)
        self.dropout   = nn.Dropout()
        self.rnn       = nn.LSTM(self.hidden_size, self.hidden_size,
                                 num_layers=self.num_layers,
                                 batch_first=True,
                                 bidirectional=self.bidirectional)
        self.attn      = Attn2('3', self.bidir*self.hidden_size, self.context_size)
        self.fc        = nn.Linear(self.bidir*self.hidden_size, self.output_size)
    
    def forward(self, texts):
        text_lengths, mask = get_lens_and_masks(texts, pad_id=self.padding_idx)
        
        embedded       = self.embedding(texts)
        packed_embeded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths,
                                                           batch_first=True)
        enc_packed, (hidden, cell) = self.rnn(packed_embeded)
        
        enc,_ = nn.utils.rnn.pad_packed_sequence(enc_packed, batch_first=True)
        res   = self.attn(enc, mask)
        return self.fc(res)

### Third model corollary

In [None]:
class Network(nn.Module):

    def __init__(self, vocab_size=proc_num.vocab_size, embed_d=50, hidden_d=50, output_d=2, context_d=50,
                 dropout=0.5, pad_idx=1, num_layers=2):
        super().__init__()

        self.hidden_d = hidden_d

        self.dropout = nn.Dropout(p=dropout)

        self.embedding = nn.Embedding(vocab_size, embed_d,
                                      padding_idx=pad_idx)

        self.lstm = nn.LSTM(bidirectional=True, num_layers=num_layers,
                            input_size=embed_d, hidden_size=hidden_d,
                            batch_first=True, dropout=dropout)

        ## Word-level hierarchical attention:
        self.ui = nn.Linear(2*hidden_d, context_d)
        self.uw = nn.Parameter(torch.randn(context_d))

        ## Output:
        self.fc = nn.Linear(2*hidden_d, output_d)

    def forward(self, x):
        seqlens, mask = get_lens_and_masks(x)

        embeds = self.embedding(x) # B X T X EmbD
        packed_embeds = nn.utils.rnn.pack_padded_sequence(embeds, seqlens,
                                                          batch_first=True)
            # 960 (B*T) X 300 (N) B*T X EmbD

        enc_packed, (h_n, c_n) = self.lstm(packed_embeds)
            # (B*T) X HdD*2
        enc, _ = nn.utils.rnn.pad_packed_sequence(enc_packed,
                                                  batch_first=True)
            # B X T X HdD*2

        ## Word-level hierarchical attention:
        u_it = torch.tanh(self.ui(enc)) # B X T X CtD
        weights = torch.softmax(u_it.matmul(self.uw), dim=1).unsqueeze(1)
            # B X 1 X T
        sent = torch.sum(weights.matmul(enc), dim=1) # B X HdD*2

        logits = self.fc(sent) # B X OutD

        return logits

# Training

In [None]:
def get_basic0():
    model = Model0(num_layers=4)
    loss_func = nn.CrossEntropyLoss()
    opt = torch.optim.Adam(model.parameters(), lr=1e-3)
    return model, loss_func, opt

In [None]:
def get_basic1():
    model = Model1(num_layers=4)
    loss_func = nn.CrossEntropyLoss()
    opt = torch.optim.Adam(model.parameters(), lr=1e-3)
    return model, loss_func, opt

In [None]:
def get_basic2():
    model = Model2(num_layers=4)
    loss_func = nn.CrossEntropyLoss()
    opt = torch.optim.Adam(model.parameters(), lr=1e-3)
    return model, loss_func, opt

In [None]:
sched = combine_scheds([0.3, 0.7], [sched_cos(3e-3, 1e-2), sched_cos(1e-2, 3e-5)])
sched_fast = combine_scheds([0.3, 0.7], [sched_cos(3e-2, 5e-1), sched_cos(5e-1, 1e-3)])

In [None]:
trainer0 = Trainer(imdb_data, *get_basic0(), cb_funcs=[
    partial(AvgStatsCallback, [accuracy]),
    partial(ParamScheduler,'lr', sched),
    ProgressCallback,
    CudaCallback,
    Recorder
])

In [None]:
trainer1 = Trainer(imdb_data, *get_basic1(), cb_funcs=[
    partial(AvgStatsCallback, [accuracy]),
    partial(ParamScheduler,'lr', sched),
    ProgressCallback,
    CudaCallback,
    Recorder
])

In [None]:
trainer2 = Trainer(imdb_data, *get_basic2(), cb_funcs=[
    partial(AvgStatsCallback, [accuracy]),
    partial(ParamScheduler,'lr', sched),
    ProgressCallback,
    CudaCallback,
    Recorder
])

## Compare Models

### Model0

In [None]:
trainer0.fit()

In [None]:
trainer1.fit()

In [None]:
trainer2.fit(epochs=10)