In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from sentimentanalyser.utils.data import Path, pad_collate, grandparent_splitter
from sentimentanalyser.utils.data import parent_labeler
from sentimentanalyser.data.text import TextList, SplitData
from sentimentanalyser.utils.files import pickle_dump, pickle_load



In [3]:
from sentimentanalyser.preprocessing.processor import TokenizerProcessor
from sentimentanalyser.preprocessing.processor import NuemericalizeProcessor
from sentimentanalyser.utils.data import read_wiki

In [4]:
from functools import partial

In [5]:
path_imdb = Path("/home/anukoolpurohit/Documents/AnukoolPurohit/Datasets/imdb")

In [6]:
path_wiki = Path("/home/anukoolpurohit/Documents/AnukoolPurohit/Datasets/wikitext-103")

In [7]:
proc_tok = TokenizerProcessor()
proc_num = NuemericalizeProcessor()

In [8]:
# train = TextList(read_wiki(path_wiki/'train.txt'), path_wiki)
# valid = TextList(read_wiki(path_wiki/'valid.txt'), path_wiki)

# len(train), len(valid)

# sd_wiki = SplitData(train, valid)

# lm_wiki = sd_wiki.label_by_func(lambda x:0, proc_x=[proc_tok, proc_num])

# pickle_dump(lm_wiki, 'dumps/variable/ll_wiki.pickle')

In [9]:
lm_wiki = pickle_load('dumps/variable/ll_wiki.pickle')

In [10]:
bs, bptt = 32, 70
wiki_data = lm_wiki.lm_databunchify(bs, bptt)

In [11]:
vocab = lm_wiki.train.proc_x[-1].vocab
len(vocab)

60001

In [12]:
x1,y1 = next(iter(wiki_data.train_dl))

## Model

In [13]:
import torch
import torch.nn.functional as F
from torch import nn

In [14]:
from sentimentanalyser.models.regularization import WeightDropout

In [15]:
from sentimentanalyser.utils.dev import print_dims
from sentimentanalyser.utils.models import get_info

### Encoder model

In [16]:
class EncoderAWDModel(nn.Module):
    def __init__(self, vocab_size, emb_sz=300, hidden_sz=256, dropout=0.5,
                 pad_idx=1, num_layers=2):
        super().__init__()
        
        self.pad_idx = pad_idx
        
        self.embeddings = nn.Embedding(vocab_size, emb_sz,
                                       padding_idx=pad_idx)
        
        self.dropout = nn.Dropout(dropout)
        
        self. rnn    = WeightDropout(nn.LSTM(emb_sz,
                                             hidden_sz,
                                             batch_first=True,
                                             dropout=dropout,
                                             num_layers=num_layers,
                                             bidirectional=False))
        return
    
    def forward(self, xb):
        seq_lens, mask = get_info(xb)
        
        embedded = self.dropout(self.embeddings(xb))
        
        packed = nn.utils.rnn.pack_padded_sequence(embedded, seq_lens,
                                                   batch_first=True)
        
        packed_out, (hidden_st, cell_st) = self.rnn(packed)
        
        lstm_out, lens = nn.utils.rnn.pad_packed_sequence(packed_out, batch_first=True)
        return lstm_out, hidden_st

### Decoder

In [17]:
class LinearDecoder(nn.Module):
    def __init__(self, hidden_sz, output_sz, tie_encoder=None, bias=True):
        super().__init__()
        self.decoder = nn.Linear(hidden_sz, output_sz, bias=bias)
        if bias: self.decoder.bias.data.zero_()
        if tie_encoder: self.decoder.weight = tie_encoder.weight
        else: nn.init.kaiming_uniform_(self.decoder.weight)
    
    def forward(self, input):
        outputs, hidden_st = input
        return self.decoder(outputs).squeeze()

### Language model Classifier

In [18]:
class LMClassifierModel(nn.Module):
    def __init__(self, vocab_sz, emb_sz=300, hidden_sz=256, output_sz=1, dropout=0.5,
                 pad_idx=1, num_layers=2):
        super().__init__()
        self.encoder = EncoderAWDModel(vocab_sz, emb_sz, hidden_sz, dropout, pad_idx,
                                num_layers)
        self.decoder = LinearDecoder(hidden_sz, vocab_sz)
        return
    
    def forward(self, xb):
        output = self.encoder(xb)
        output = self.decoder(output)
        return output

In [19]:
model = LMClassifierModel(len(vocab))

In [20]:
y = model(x1)

In [21]:
y.view(-1, len(vocab)).shape

torch.Size([2240, 60001])

### alt model

In [22]:
from exp.nb_12a import AWD_LSTM, get_language_model, RNNTrainer
from exp.nb_12a import LinearDecoder as LinD

In [None]:
from sentimentanalyser.preprocessing.tokens import TOKENS

In [None]:
import numpy as np

In [None]:
dps = np.array([0.1, 0.15, 0.25, 0.02, 0.2]) * 0.2
tok_pad = vocab.index(TOKENS.PAD)

In [None]:
alt_model = get_language_model(len(vocab), 300, 356, 2, tok_pad, *dps)

In [None]:
y = alt_model(x1)

In [None]:
y[0].shape, 

(torch.Size([2240, 60001]),)

## Loss function and accuracy flat

In [None]:
def cross_entropy_flat(input, target):
    bs, sl = target.size()
    return F.cross_entropy(input.view(bs * sl, -1), target.view(bs * sl))

In [None]:
def accuracy_flat(input, target):
    bs, sl = target.size()
    return accuracy(input.view(bs * sl, -1), target.view(bs * sl))

## Training

In [None]:
from sentimentanalyser.optimizers import adam_opt
from sentimentanalyser.utils.metrics import accuracy
from sentimentanalyser.utils.callbacks import combine_scheds, sched_cos, cos_1cycle_anneal
from sentimentanalyser.utils.callbacks import create_phases

In [None]:
def get_basic(Model, vocab):
    model = Model(len(vocab))
    loss_func = cross_entropy_flat
    opt = adam_opt()(model.parameters())
    return model, loss_func, opt

In [None]:
def get_alt():
    model = get_language_model(len(vocab), 300, 356, 2, tok_pad, *dps)
    loss_func = cross_entropy_flat
    opt = adam_opt()(model.parameters())
    return model, loss_func, opt

In [None]:
sched = combine_scheds([0.3, 0.7], [sched_cos(1e-4, 1e-3), sched_cos(1e-3, 3e-5)])

In [None]:
from sentimentanalyser.utils.callbacks import sched_cos, combine_scheds
from sentimentanalyser.callbacks.training import LRFind, CudaCallback, GradientClipping
from sentimentanalyser.callbacks.progress import ProgressCallback
from sentimentanalyser.callbacks.scheduler import ParamSchedulerCustom
from sentimentanalyser.callbacks.stats import AvgStatsCallback
from sentimentanalyser.callbacks.recorder import RecorderCustom
from sentimentanalyser.training.trainer import Trainer

In [None]:
cbs1 = [partial(AvgStatsCallback, [accuracy_flat]),
       partial(ParamSchedulerCustom,'lr', [sched]),
       partial(GradientClipping, clip=0.1),
       ProgressCallback,
       CudaCallback,
       RecorderCustom]

In [None]:
cbs2 = [partial(AvgStatsCallback,accuracy_flat),
       CudaCallback, RecorderCustom,
       partial(ParamSchedulerCustom,'lr', [sched]),
       partial(GradientClipping, clip=0.1),
       partial(RNNTrainer, α=2., β=1.),
       ProgressCallback]

In [None]:
trainer1 = Trainer(wiki_data, *get_basic(LMClassifierModel, vocab), cb_funcs=cbs1)

In [None]:
trainer1.fit()

epoch,train_loss,train_accuracy_flat,valid_loss,valid_accuracy_flat,time
0,5.247426,0.229857,4.661757,0.27718,1:00:52
1,4.624358,0.270489,4.383159,0.294448,1:00:26


In [None]:
#trainer2 = Trainer(wiki_data, *get_alt(), cb_funcs=cbs2)

In [None]:
#trainer2.fit()