In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from sentimentanalyser.utils.data import Path, pad_collate, grandparent_splitter
from sentimentanalyser.utils.data import parent_labeler
from sentimentanalyser.data.text import TextList, SplitData
from sentimentanalyser.utils.files import pickle_dump, pickle_load

In [3]:
from sentimentanalyser.preprocessing.processor import TokenizerProcessor
from sentimentanalyser.preprocessing.processor import NuemericalizeProcessor
from sentimentanalyser.utils.data import read_wiki

In [4]:
import numpy as np
from functools import partial

In [5]:
path_imdb = Path("/home/anukoolpurohit/Documents/AnukoolPurohit/Datasets/imdb")

In [6]:
path_wiki = Path("/home/anukoolpurohit/Documents/AnukoolPurohit/Datasets/wikitext-103")

In [7]:
path_cache = Path('/home/anukoolpurohit/Documents/AnukoolPurohit/Models/WordEmbeddings')

In [8]:
from torchtext import vocab

In [9]:
ft_eng = vocab.FastText(cache=path_cache)

In [10]:
proc_tok = TokenizerProcessor()
proc_num = NuemericalizeProcessor()

In [11]:
# train = TextList(read_wiki(path_wiki/'train.txt'), path_wiki)
# valid = TextList(read_wiki(path_wiki/'valid.txt'), path_wiki)

# len(train), len(valid)

# sd_wiki = SplitData(train, valid)

# lm_wiki = sd_wiki.label_by_func(lambda x:0, proc_x=[proc_tok, proc_num])

# pickle_dump(lm_wiki, 'dumps/variable/ll_wiki.pickle')

In [12]:
lm_wiki = pickle_load('dumps/variable/ll_wiki.pickle')

In [13]:
bs, bptt = 32, 70
wiki_data = lm_wiki.lm_databunchify(bs, bptt)

In [14]:
vocab = lm_wiki.train.proc_x[-1].vocab
len(vocab)

60001

In [15]:
x1,y1 = next(iter(wiki_data.train_dl))

## Model

In [16]:
import torch
import torch.nn.functional as F
from torch import nn

In [17]:
from sentimentanalyser.models.regularization import WeightDropout

In [18]:
from sentimentanalyser.utils.dev import print_dims
from sentimentanalyser.utils.models import get_info, get_embedding_vectors

In [19]:
def display_y(y):
    if isinstance(y, (list, tuple)):
        print("------------------")
        for yi in y:
                display_y(yi)
    else:
        print_dims("tensor", y)

### Dropouts

In [20]:
def dropout_mask(x, sz, p):
    return x.new(*sz).bernoulli_(1-p).div_(1-p)

In [21]:
class RNNDropout(nn.Module):
    def __init__(self, dropout=0.5):
        super().__init__()
        self.dropout = dropout
    
    def forward(self, inp):  
        if not self.training or self.dropout == 0.:
            return inp
        bs , seq_len, vocab_size = inp.size()
        mask = dropout_mask(inp.data, (bs, 1, vocab_size), self.dropout)
        return inp * mask

In [22]:
class EmbeddingsWithDropout(nn.Module):
    def __init__(self, embeddings, embeddings_dropout):
        super().__init__()
        self.embeddings = embeddings
        self.embeddings_dropout = embeddings_dropout
        self.padding_idx = self.embeddings.padding_idx
        if self.padding_idx is None:
            self.padding_idx = -1
    
    def forward(self, words, scale=None):
        if self.training and self.embeddings_dropout != 0:
            vocab_length, embedding_size = self.embeddings.weight.size()
            mask = dropout_mask(self.embeddings.weight.data,
                                (vocab_length, 1),
                                self.embeddings_dropout)
            
            masked_embeddings = self.embeddings.weight * mask
            
        else:
            masked_embeddings = self.embeddings.weight
        
        return F.embedding(words, masked_embeddings, self.padding_idx,
                           self.embeddings.max_norm, self.embeddings.norm_type,
                           self.embeddings.scale_grad_by_freq, self.embeddings.sparse)
            

### Encoder model

In [23]:
def to_detach(h):
    "Detaches `h` from its history."
    return h.detach() if type(h) == torch.Tensor else tuple(to_detach(v) for v in h)

In [24]:
class AWDLSTM(nn.Module):
    
    def __init__(self, embdeding_size, hidden_size, num_layers, weight_drop=0.5,
                 hidden_drop=0.2):
        super().__init__()
        self.num_layers, self.hidden_size = num_layers, hidden_size
        self.embdeding_size, self.batch_size = embdeding_size, 1
        self.rnns = []
        self.hidden_dropouts = []
        
        for layer in range(num_layers):
            input_size = embdeding_size if layer == 0 else hidden_size
            output_size = hidden_size if layer != num_layers else embdeding_size    
            rnn = nn.LSTM(input_size, output_size, num_layers=1, batch_first=True)
            self.rnns.append(WeightDropout(rnn, weight_drop))
            self.hidden_dropouts.append(RNNDropout(hidden_drop))
        
        self.rnns = nn.ModuleList(self.rnns)
        self.hidden_dropouts = nn.ModuleList(self.hidden_dropouts)
    
    def _one_hidden(self, l):
        "Return one hidden state."
        nh = self.hidden_size if l != self.num_layers - 1 else self.embdeding_size
        return next(self.parameters()).new(1, self.batch_size, nh).zero_()

    def reset(self):
        "Reset the hidden states."
        self.hidden = [(self._one_hidden(l), self._one_hidden(l))
                       for l in range(self.num_layers)]
    
    def forward(self, embedded):
        batch_size, seq_len, vocab_size = embedded.size()
        if batch_size != self.batch_size:
            self.batch_size = batch_size
            self.reset()
        
        new_hidden,raw_outputs,outputs = [],[],[]
        
        raw_output = embedded
        for l, (rnn,hid_dp) in enumerate(zip(self.rnns, self.hidden_dropouts)):
            raw_output, new_h = rnn(raw_output, self.hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.num_layers - 1:
                raw_output = hid_dp(raw_output)
            outputs.append(raw_output) 
        self.hidden = to_detach(new_hidden)
        return raw_outputs, outputs
            
    

In [25]:
class ALTEncAWDLSTM(nn.Module):
    initrange=0.1
    def __init__(self, vocab_size, embedding_size, hidden_size, num_layers, padding_idx,
                 hidden_drop=0.2, input_drop=0.6, embeddings_drop=0.1, weight_drop=0.5):
        super().__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_size, padding_idx=padding_idx)
        self.embeddings_dropout = EmbeddingsWithDropout(self.embeddings, embeddings_drop)
        
        self.rnns = AWDLSTM(embedding_size, hidden_size, num_layers, weight_drop, hidden_drop)
        
        self.embeddings.weight.data.uniform_(-self.initrange, self.initrange)
        
        self.input_dropout = RNNDropout(input_drop)
    
    def forward(self, texts):
        embedded = self.input_dropout(self.embeddings_dropout(texts))
        raw_outputs, outputs = self.rnns(embedded)
        return raw_outputs, outputs
        

In [26]:
class EncAWDLSTMWithEmbeds(nn.Module):
    initrange=0.1
    def __init__(self, local_vocab, torchtext_vocab, hidden_size, num_layers, padding_idx,
                 hidden_drop=0.2, input_drop=0.6, embeddings_drop=0.1, weight_drop=0.5):
        super().__init__()
        
        embd_vecs = get_embedding_vectors(local_vocab, torchtext_vocab)
        embedding_size = embd_vecs.shape[-1]
        print(padding_idx)
        self.embeddings = nn.Embedding.from_pretrained(embd_vecs,
                                                       freeze=false,
                                                       padding_idx=padding_idx)
        
        self.embeddings_dropout = EmbeddingsWithDropout(self.embeddings, embeddings_drop)
        
        self.rnns = AWDLSTM(embedding_size, hidden_size, num_layers, weight_drop, hidden_drop)
        
        #self.embeddings.weight.data.uniform_(-self.initrange, self.initrange)
        
        self.input_dropout = RNNDropout(input_drop)
    
    def forward(self, texts):
        embedded = self.input_dropout(self.embeddings_dropout(texts))
        raw_outputs, outputs = self.rnns(embedded)
        return raw_outputs, outputs

### Decoder

In [27]:
class LinearDecoder(nn.Module):
    def __init__(self, hidden_sz, output_sz, dropout, tie_encoder=None, bias=True):
        super().__init__()
        self.output_dp = RNNDropout(dropout)
        self.decoder = nn.Linear(hidden_sz, output_sz, bias=bias)
        if bias: self.decoder.bias.data.zero_()
        if tie_encoder: self.decoder.weight = tie_encoder.weight
        else: nn.init.kaiming_uniform_(self.decoder.weight)
    
    def forward(self, input):
        raw_outputs, outputs = input
        output = self.output_dp(outputs[-1])
        #output = self.output_dp(outputs)
        decoded = self.decoder(output)
        return decoded, outputs, raw_outputs

### Language model Classifier

In [28]:
class LMClassifierModel(nn.Module):
    def __init__(self, vocab_sz, emb_sz=300, hidden_sz=300, output_sz=1, dropout=0.2,
                 pad_idx=1, num_layers=2):
        super().__init__()
        self.dps = dps = np.array([0.1, 0.15, 0.25, 0.02, 0.2]) * dropout
        self.encoder = ALTEncAWDLSTM(vocab_sz, emb_sz, hidden_sz, num_layers, pad_idx,
                                      *dps[:-1])
        self.decoder = LinearDecoder(hidden_sz, vocab_sz, dps[-1], tie_encoder=self.encoder.embeddings)
        return
    
    def forward(self, xb):
        output_enc = self.encoder(xb)
        output_dec = self.decoder(output_enc)
        return output_dec

In [29]:
class LMClassifierModelWithEmbeds(nn.Module):
    def __init__(self, local_vocab, torchtext_vocab, hidden_sz=300, output_sz=1, dropout=0.2,
                 pad_idx=1, num_layers=2):
        super().__init__()
        self.dps = dps = np.array([0.1, 0.15, 0.25, 0.02, 0.2]) * dropout
        self.encoder = En(local_vocab, torchtext_vocab, hidden_sz, num_layers, pad_idx,
                                      *dps[:-1])
        self.decoder = LinearDecoder(hidden_sz, vocab_sz, dps[-1], tie_encoder=self.encoder.embeddings)
        return
    
    def forward(self, xb):
        output_enc = self.encoder(xb)
        output_dec = self.decoder(output_enc)
        return output_dec

In [30]:
model = LMClassifierModel(len(vocab))

In [31]:
y_main = model(x1)

In [32]:
display_y(y_main)

------------------
size of tensor is torch.Size([32, 70, 60001])
------------------
size of tensor is torch.Size([32, 70, 300])
size of tensor is torch.Size([32, 70, 300])
------------------
size of tensor is torch.Size([32, 70, 300])
size of tensor is torch.Size([32, 70, 300])


## Loss function and accuracy flat

In [33]:
def cross_entropy_flat(input, target):
    bs, sl = target.size()
    return F.cross_entropy(input.view(bs * sl, -1), target.view(bs * sl))

In [34]:
def accuracy_flat(input, target):
    bs, sl = target.size()
    return accuracy(input.view(bs * sl, -1), target.view(bs * sl))

## Training

In [35]:
from sentimentanalyser.callbacks.core import Callback

In [36]:
class RNNCustomTrainer(Callback):
    def __init__(self, α, β):
        self.α = α
        self.β = β
        
    def after_pred(self):
        self.out, self.raw_out = self.preds[1], self.preds[2]
        self.trainer.preds = self.trainer.preds[0]
    
    def after_loss(self):
        if self.α != 0.:
            self.trainer.loss += self.α * self.out[-1].float().pow(2).mean()
        
        if self.β != 0.:
            h = self.raw_out[-1]
            if len(h) > 1:
                self.trainer.loss += self.β * (h[:,1:] - h[:,:-1]).float().pow(2).mean()
    def begin_epoch(self):
        if hasattr(self.dl.dataset, "batchify"):
            self.dl.dataset.batchify()

In [37]:
from sentimentanalyser.optimizers import adam_opt
from sentimentanalyser.utils.metrics import accuracy
from sentimentanalyser.utils.callbacks import combine_scheds, sched_cos, cos_1cycle_anneal
from sentimentanalyser.utils.callbacks import create_phases

In [38]:
def get_basic(Model, vocab):
    model = Model(len(vocab))
    loss_func = cross_entropy_flat
    opt = adam_opt()(model.parameters())
    return model, loss_func, opt

In [39]:
def get_embeds(Model, local_vocab, torchtext_vocab):
    model = Model(local_vocab, torchtext_vocab)
    loss_func = cross_entropy_flat
    opt = adam_opt()(model.parameters())
    return model, loss_func, opt

In [40]:
sched = combine_scheds([0.3, 0.7], [sched_cos(1e-4, 1e-3), sched_cos(1e-3, 3e-5)])

In [41]:
from sentimentanalyser.utils.callbacks import sched_cos, combine_scheds
from sentimentanalyser.callbacks.training import LRFind, CudaCallback, GradientClipping
from sentimentanalyser.callbacks.progress import ProgressCallback
from sentimentanalyser.callbacks.scheduler import ParamSchedulerCustom
from sentimentanalyser.callbacks.stats import AvgStatsCallback
from sentimentanalyser.callbacks.recorder import RecorderCustom
from sentimentanalyser.training.trainer import Trainer

In [42]:
cbs = [partial(AvgStatsCallback, [accuracy_flat]),
       partial(ParamSchedulerCustom,'lr', [sched]),
       partial(GradientClipping, clip=0.1),
       ProgressCallback,
       CudaCallback,
       partial(RNNCustomTrainer, α=2., β=1.),
       RecorderCustom]

In [43]:
trainer = Trainer(wiki_data, *get_embeds(LMClassifierModel, vocab, ft_eng), cb_funcs=cbs)

TypeError: '<' not supported between instances of 'int' and 'list'

In [None]:
trainer.fit()