In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from sentimentanalyser.utils.data import Path, pad_collate, grandparent_splitter
from sentimentanalyser.utils.data import parent_labeler
from sentimentanalyser.data.text import TextList, SplitData
from sentimentanalyser.utils.files import pickle_dump, pickle_load

In [None]:
from sentimentanalyser.preprocessing.processor import TokenizerProcessor
from sentimentanalyser.preprocessing.processor import NuemericalizeProcessor
from sentimentanalyser.utils.data import read_wiki

In [None]:
from functools import partial

In [None]:
path_imdb = Path("/home/anukoolpurohit/Documents/AnukoolPurohit/Datasets/imdb")

In [None]:
path_wiki = Path("/home/anukoolpurohit/Documents/AnukoolPurohit/Datasets/wikitext-103")

In [None]:
proc_tok = TokenizerProcessor()
proc_num = NuemericalizeProcessor()

In [None]:
# train = TextList(read_wiki(path_wiki/'train.txt'), path_wiki)
# valid = TextList(read_wiki(path_wiki/'valid.txt'), path_wiki)

# len(train), len(valid)

# sd_wiki = SplitData(train, valid)

# lm_wiki = sd_wiki.label_by_func(lambda x:0, proc_x=[proc_tok, proc_num])

# pickle_dump(lm_wiki, 'dumps/variable/ll_wiki.pickle')

In [None]:
lm_wiki = pickle_load('dumps/variable/ll_wiki.pickle')

In [None]:
bs, bptt = 32, 70
wiki_data = lm_wiki.lm_databunchify(bs, bptt)

In [None]:
vocab = lm_wiki.train.proc_x[-1].vocab
len(vocab)

In [None]:
x1,y1 = next(iter(wiki_data.train_dl))

## Model

In [None]:
import torch
import torch.nn.functional as F
from torch import nn

In [None]:
from sentimentanalyser.models.regularization import WeightDropout

In [None]:
from sentimentanalyser.utils.dev import print_dims
from sentimentanalyser.utils.models import get_info

In [None]:
from exp.nb_12a import AWD_LSTM, get_language_model, RNNTrainer
from exp.nb_12a import RNNDropout, EmbeddingDropout
from exp.nb_12a import LinearDecoder as LinD

In [None]:
def display_y(y):
    if isinstance(y, (list, tuple)):
        print("------------------")
        for yi in y:
                display_y(yi)
    else:
        print_dims("tensor", y)

### Dropouts

In [None]:
def dropout_mask(x, sz, p):
    return x.new(*sz).bernoulli_(1-p).div_(1-p)

In [None]:
ones = torch.ones(4, 4, 4); ones

In [None]:
mask = dropout_mask(ones, (4, 1, 4), 0.5); mask

In [None]:
ones * mask

In [None]:
class RNNDropout(nn.Module):
    def __init__(self, dropout=0.5):
        super().__init__()
        self.dropout = dropout
    
    def forward(self, inp):  
        if not self.training or self.dropout == 0.:
            return inp
        bs , seq_len, vocab_size = inp.size()
        mask = dropout_mask(inp.data, (bs, 1, vocab_size), self.dropout)
        return inp * mask

In [None]:
class EmbeddingsWithDropout(nn.Module):
    def __init__(self, embeddings, embeddings_dropout):
        super().__init__()
        self.embeddings = embeddings
        self.embeddings_dropout = embeddings_dropout
        self.padding_idx = self.embeddings.padding_idx
        if self.padding_idx is None:
            self.padding_idx = -1
    
    def forward(self, words, scale=None):
        if self.training and self.embeddings_dropout != 0:
            vocab_length, embedding_size = self.embeddings.weight.size()
            mask = dropout_mask(self.embeddings.weight.data,
                                (vocab_length, 1),
                                self.embeddings_dropout)
            
            masked_embeddings = self.embeddings.weight * mask
            
        else:
            masked_embeddings = self.embeddings.weight
        
        return F.embedding(words, masked_embeddings, self.padding_idx,
                           self.embeddings.max_norm, self.embeddings.norm_type,
                           self.embeddings.scale_grad_by_freq, self.embeddings.sparse)
            

### Encoder model

In [None]:
class EncoderAWDModel(nn.Module):
    def __init__(self, vocab_size, emb_sz=300, hidden_sz=256, dropout=0.5,
                 pad_idx=1, num_layers=2):
        super().__init__()
        
        self.pad_idx = pad_idx
        
        self.embeddings = nn.Embedding(vocab_size, emb_sz,
                                       padding_idx=pad_idx)
        
        self.embeddings_dp = EmbeddingsWithDropout(self.embeddings, 0.004)
        
        self.inp_dp  = RNNDropout(0.05)
        
        self.dropout = nn.Dropout(dropout)
        
        self. rnn    = WeightDropout(nn.LSTM(emb_sz,
                                             hidden_sz,
                                             batch_first=True,
                                             dropout=0.03,
                                             num_layers=num_layers,
                                             bidirectional=False),0.04)
        return
    
    def forward(self, xb):
        seq_lens, mask = get_info(xb)
        
        embedded = self.inp_dp(self.embeddings_dp(xb))
        
        packed = nn.utils.rnn.pack_padded_sequence(embedded, seq_lens,
                                                   batch_first=True)
        
        packed_out, (hidden_st, cell_st) = self.rnn(packed)
        
        lstm_out, lens = nn.utils.rnn.pad_packed_sequence(packed_out, batch_first=True)
        return lstm_out, hidden_st , cell_st

### Decoder

In [None]:
class LinearDecoder(nn.Module):
    def __init__(self, hidden_sz, output_sz, tie_encoder=None, bias=True):
        super().__init__()
        self.output_dp = RNNDropout(0.02)
        self.decoder = nn.Linear(hidden_sz, output_sz, bias=bias)
        if bias: self.decoder.bias.data.zero_()
        if tie_encoder: self.decoder.weight = tie_encoder.weight
        else: nn.init.kaiming_uniform_(self.decoder.weight)
    
    def forward(self, input):
        outputs, hidden_st, cell_st = input
        raw_outputs = outputs
        outputs = self.output_dp(outputs)
        decoded = self.decoder(outputs)
        return decoded, outputs, raw_outputs

### Language model Classifier

In [None]:
class LMClassifierModel(nn.Module):
    def __init__(self, vocab_sz, emb_sz=300, hidden_sz=300, output_sz=1, dropout=0.5,
                 pad_idx=1, num_layers=2):
        super().__init__()
        self.encoder = EncoderAWDModel(vocab_sz, emb_sz, hidden_sz, dropout, pad_idx,
                                num_layers)
        self.decoder = LinearDecoder(hidden_sz, vocab_sz, tie_encoder=self.encoder.embeddings)
        return
    
    def forward(self, xb):
        output_enc = self.encoder(xb)
        output_dec = self.decoder(output_enc)
        return output_dec

In [None]:
model = LMClassifierModel(len(vocab))

In [None]:
y_main = model(x1)

In [None]:
display_y(y_main)

### alt model

In [None]:
from sentimentanalyser.preprocessing.tokens import TOKENS

In [None]:
import numpy as np

In [None]:
dps = np.array([0.1, 0.15, 0.25, 0.02, 0.2]) * 0.2
tok_pad = vocab.index(TOKENS.PAD)

In [None]:
alt_model = get_language_model(len(vocab), 300, 356, 2, tok_pad, *dps)

In [None]:
y = alt_model(x1)

In [None]:
display_y(y)

## Loss function and accuracy flat

In [None]:
def cross_entropy_flat(input, target):
    bs, sl = target.size()
    return F.cross_entropy(input.view(bs * sl, -1), target.view(bs * sl))

In [None]:
def accuracy_flat(input, target):
    bs, sl = target.size()
    return accuracy(input.view(bs * sl, -1), target.view(bs * sl))

## Training

In [None]:
from sentimentanalyser.callbacks.core import Callback

In [None]:
class RNNCustomTrainer(Callback):
    def __init__(self, α, β):
        self.α = α
        self.β = β
        
    def after_pred(self):
        self.out, self.raw_out = self.preds[1], self.preds[2]
        self.trainer.preds = self.trainer.preds[0]
    
    def after_loss(self):
        if self.α != 0.:
            self.trainer.loss += self.α * self.out.float().pow(2).mean()
        
        if self.β != 0.:
            h = self.raw_out
            if len(h) > 1:
                self.trainer.loss += self.β * (h[:,1:] - h[:,:-1]).float().pow(2).mean()
    def begin_epoch(self):
        if hasattr(self.dl.dataset, "batchify"):
            self.dl.dataset.batchify()

In [None]:
from sentimentanalyser.optimizers import adam_opt
from sentimentanalyser.utils.metrics import accuracy
from sentimentanalyser.utils.callbacks import combine_scheds, sched_cos, cos_1cycle_anneal
from sentimentanalyser.utils.callbacks import create_phases

In [None]:
def get_basic(Model, vocab):
    model = Model(len(vocab))
    loss_func = cross_entropy_flat
    opt = adam_opt()(model.parameters())
    return model, loss_func, opt

In [None]:
def get_alt():
    model = get_language_model(len(vocab), 300, 356, 2, tok_pad, *dps)
    loss_func = cross_entropy_flat
    opt = adam_opt()(model.parameters())
    return model, loss_func, opt

In [None]:
sched = combine_scheds([0.3, 0.7], [sched_cos(1e-4, 1e-3), sched_cos(1e-3, 3e-5)])

In [None]:
from sentimentanalyser.utils.callbacks import sched_cos, combine_scheds
from sentimentanalyser.callbacks.training import LRFind, CudaCallback, GradientClipping
from sentimentanalyser.callbacks.progress import ProgressCallback
from sentimentanalyser.callbacks.scheduler import ParamSchedulerCustom
from sentimentanalyser.callbacks.stats import AvgStatsCallback
from sentimentanalyser.callbacks.recorder import RecorderCustom
from sentimentanalyser.training.trainer import Trainer

In [None]:
cbs1 = [partial(AvgStatsCallback, [accuracy_flat]),
       partial(ParamSchedulerCustom,'lr', [sched]),
       partial(GradientClipping, clip=0.1),
       ProgressCallback,
       CudaCallback,
       partial(RNNCustomTrainer, α=2., β=1.),
       RecorderCustom]

In [None]:
cbs2 = [partial(AvgStatsCallback,accuracy_flat),
       CudaCallback, RecorderCustom,
       partial(ParamSchedulerCustom,'lr', [sched]),
       partial(GradientClipping, clip=0.1),
       partial(RNNTrainer, α=2., β=1.),
       ProgressCallback]

In [None]:
trainer1 = Trainer(wiki_data, *get_basic(LMClassifierModel, vocab), cb_funcs=cbs1)

In [None]:
trainer1.fit()

In [None]:
#trainer2 = Trainer(wiki_data, *get_alt(), cb_funcs=cbs2)

In [None]:
#trainer2.fit()