In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from sentimentanalyser.utils.data  import Path, pad_collate, grandparent_splitter
from sentimentanalyser.utils.data  import parent_labeler, listify
from sentimentanalyser.data.text   import TextList, ItemList, SplitData
from sentimentanalyser.utils.files import pickle_dump, pickle_load

In [None]:
from sentimentanalyser.preprocessing.processor import TokenizerProcessor, NuemericalizeProcessor

In [None]:
from sentimentanalyser.utils.data import read_wiki

In [None]:
from functools import partial

In [None]:
path_imdb = Path("/home/anukoolpurohit/Documents/AnukoolPurohit/Datasets/imdb")

In [None]:
path_wiki = Path("/home/anukoolpurohit/Documents/AnukoolPurohit/Datasets/wikitext-103")

In [None]:
proc_tok = TokenizerProcessor()
proc_num = NuemericalizeProcessor()

In [None]:
train = TextList(read_wiki(path_wiki/'train.txt'), path_wiki)
valid = TextList(read_wiki(path_wiki/'valid.txt'), path_wiki)

In [None]:
len(train), len(valid)

In [None]:
sd_wiki = SplitData(train, valid)

In [None]:
ll_wiki = sd_wiki.label_by_func(lambda x:0, proc_x=[proc_tok, proc_num])

In [None]:
pickle_dump(ll_wiki, 'dumps/variable/ll_wiki.pickle')

In [None]:
ll_wiki = pickle_load('dumps/variable/ll_wiki.pickle')

In [None]:
bs, bptt = 32, 70
wiki_data = ll_wiki.lm_databunchify(bs, bptt)

In [None]:
vocab = ll_wiki.train.proc_x[-1].vocab
len(vocab)

In [None]:
x1,y1 = next(iter(wiki_data.train_dl))

In [None]:
import torch
import torch.nn.functional as F
from torch import nn

In [None]:
from sentimentanalyser.utils.callbacks import sched_cos, combine_scheds
from sentimentanalyser.callbacks.training import LR_Find, CudaCallback, GradientClipping
from sentimentanalyser.callbacks.progress import ProgressCallback
from sentimentanalyser.callbacks.scheduler import ParamScheduler
from sentimentanalyser.callbacks.stats import AvgStatsCallback
from sentimentanalyser.callbacks.recorder import Recorder
from sentimentanalyser.training.trainer import Trainer

In [None]:
from sentimentanalyser.utils.dev import get_info, print_dims
from sentimentanalyser.utils.training import get_embedding_vectors
from sentimentanalyser.utils.metrics import accuracy
from tqdm.auto import tqdm

# Model

## Weight Dropout

In [None]:
class WeightDropout(nn.Module):
    def __init__(self, module, dropout=0.5):
        super().__init__()
        self.module, self.dropout = module, dropout
        
        self.layer_names = self.get_layer_names()
        for layer_name in self.layer_names:
            weight = getattr(self.module, layer_name)
            self.register_parameter(layer_name+'_raw', nn.Parameter(weight.data))
        return
    
    def _setweights(self):
        for layer_name in self.layer_names:
            raw_w = getattr(self, layer_name+'_raw')
            self.module._parameters[layer_name] = F.dropout(raw_w, self.dropout,
                                                            training=self.training)
        return
    
    def get_layer_names(self):
        names = [f"weight_hh_l{i}" for i in range(self.module.num_layers)]
        if self.module.bidirectional:
            names = names + [name+'_reverse' for name in names]
        return names
    
    def forward(self, *args):
        self._setweights()
        self.module.flatten_parameters()
        return self.module.forward(*args)

In [None]:
class AWDModel(nn.Module):
    def __init__(self, vocab_size, emb_sz=300, hidden_sz=256, dropout=0.5,
                 pad_idx=1, num_layers=2, bidirectional=True):
        super().__init__()
        
        self.pad_idx = pad_idx
        
        self.bidir = 2 if bidirectional else 1
        
        self.embeddings = nn.Embedding(vocab_size, emb_sz,
                                       padding_idx=pad_idx)
        
        self.dropout = nn.Dropout(dropout)
        
        self. rnn    = WeightDropout(nn.LSTM(emb_sz,
                                             hidden_sz,
                                             batch_first=True,
                                             dropout=dropout,
                                             num_layers=num_layers,
                                             bidirectional=bidirectional))
        return
    
    def forward(self, xb):
        seq_lens, mask = get_info(xb)
        
        embedded = self.dropout(self.embeddings(xb))
        
        packed = nn.utils.rnn.pack_padded_sequence(embedded, seq_lens,
                                                   batch_first=True)
        
        packed_out, (hidden_st, cell_st) = self.rnn(packed)
        
        lstm_out, lens = nn.utils.rnn.pad_packed_sequence(packed_out, batch_first=True)
        return lstm_out, hidden_st

In [None]:
class LinearDecoder(nn.Module):
    def __init__(self, hidden_sz, output_sz, tie_encoder=None, bias=True):
        super().__init__()
        self.decoder = nn.Linear(hidden_sz, output_sz, bias=bias)
        if bias: self.decoder.bias.data.zero_()
        if tie_encoder: self.decoder.weight = tie_encoder.weight
        else: nn.init.kaiming_uniform_(self.decoder.weight)
    
    def forward(self, input):
        outputs, hidden_st = input
        return self.decoder(outputs).squeeze()

In [None]:
class ClassifierModel(nn.Module):
    def __init__(self, vocab_sz, emb_sz=300, hidden_sz=256, output_sz=1, dropout=0.5,
                 pad_idx=1, num_layers=2, bidirectional=True):
        super().__init__()
        self.encoder = AWDModel(vocab_sz, emb_sz, hidden_sz, dropout, pad_idx,
                                num_layers, bidirectional)
        bidir = 2 if bidirectional else 1
        self.decoder = LinearDecoder(bidir*hidden_sz, vocab_sz)
        return
    
    def forward(self, xb):
        output = self.encoder(xb)
        output = self.decoder(output)
        return output

In [None]:
model = ClassifierModel(len(vocab))

In [None]:
y = model(x1)

In [None]:
y1.shape

In [None]:
y.shape

# Training

In [None]:
def cross_entropy_flat(input, target):
    bs, sl = target.size()
    return F.cross_entropy(input.view(bs * sl, -1), target.view(bs * sl))

In [None]:
b = y.view(32*70, -1)

In [None]:
a = y1.view(32*70)

In [None]:
a.shape, b.shape

In [None]:
a[326]

In [None]:
y1.shape

In [None]:
cross_entropy_flat(y, y1)

In [None]:
def accuracy_flat(input, target):
    bs, sl = target.size()
    return accuracy(input.view(bs * sl, -1), target.view(bs * sl))

In [None]:
def get_params(model):
    emb, wgts, biases = [],[],[]
    for name, param in model.named_parameters():
        if ('embedding' in name.split('.')):
            emb.append(param)
        elif ('weight' in name.split('.')):
            wgts.append(param)
        elif ('bias' in name.split('.')):
            biases.append(param)
    return emb, wgts, biases

In [None]:
def pg_dicts(pgs):
    return [{'params': p} for p in pgs]

In [None]:
def get_basic(Model, vocab):
    model = Model(len(vocab))
    loss_func = cross_entropy_flat
    opt = torch.optim.AdamW(model.parameters(), lr=1e-3)
    return model, loss_func, opt

In [None]:
sched = combine_scheds([0.3, 0.7], [sched_cos(1e-4, 1e-3), sched_cos(1e-3, 3e-5)])

In [None]:
cbs = [partial(AvgStatsCallback, [accuracy_flat]),
       partial(ParamScheduler,'lr', [sched]),
       partial(GradientClipping, clip=0.1),
       ProgressCallback,
       CudaCallback,
       Recorder]

In [None]:
trainer = Trainer(wiki_data, *get_basic(ClassifierModel, vocab), cb_funcs=cbs)

In [None]:
trainer.fit(epochs=1)