In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from sentimentanalyser.utils.data  import Path, pad_collate, grandparent_splitter
from sentimentanalyser.utils.data  import parent_labeler, listify
from sentimentanalyser.data.text   import TextList, ItemList, SplitData
from sentimentanalyser.utils.files import pickle_dump, pickle_load

In [3]:
from sentimentanalyser.preprocessing.processor import TokenizerProcessor, NuemericalizeProcessor

In [4]:
from sentimentanalyser.utils.data import read_wiki

In [5]:
from functools import partial

In [6]:
path_imdb = Path("/home/anukoolpurohit/Documents/AnukoolPurohit/Datasets/imdb")

In [7]:
path_wiki = Path("/home/anukoolpurohit/Documents/AnukoolPurohit/Datasets/wikitext-103")

In [8]:
proc_tok = TokenizerProcessor()
proc_num = NuemericalizeProcessor()

In [8]:
train = TextList(read_wiki(path_wiki/'train.txt'), path_wiki)
valid = TextList(read_wiki(path_wiki/'valid.txt'), path_wiki)

In [9]:
len(train), len(valid)

(28476, 60)

In [10]:
sd_wiki = SplitData(train, valid)

In [12]:
ll_wiki = sd_wiki.label_by_func(lambda x:0, proc_x=[proc_tok, proc_num])

HBox(children=(IntProgress(value=0, max=15), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




In [13]:
pickle_dump(ll_wiki, 'dumps/variable/ll_wiki.pickle')

In [9]:
ll_wiki = pickle_load('dumps/variable/ll_wiki.pickle')

In [10]:
bs, bptt = 32, 70
wiki_data = ll_wiki.lm_databunchify(bs, bptt)

In [11]:
vocab = ll_wiki.train.proc_x[-1].vocab
len(vocab)

60001

In [23]:
x1,y1 = next(iter(wiki_data.train_dl))

In [12]:
import torch
import torch.nn.functional as F
from torch import nn

In [13]:
from sentimentanalyser.utils.callbacks import sched_cos, combine_scheds
from sentimentanalyser.callbacks.training import LR_Find, CudaCallback, GradientClipping
from sentimentanalyser.callbacks.progress import ProgressCallback
from sentimentanalyser.callbacks.scheduler import ParamScheduler
from sentimentanalyser.callbacks.stats import AvgStatsCallback
from sentimentanalyser.callbacks.recorder import Recorder
from sentimentanalyser.training.trainer import Trainer

In [14]:
from sentimentanalyser.utils.dev import get_info, print_dims
from sentimentanalyser.utils.training import get_embedding_vectors
from sentimentanalyser.utils.metrics import accuracy
from tqdm.auto import tqdm

# Model

## Weight Dropout

In [15]:
class WeightDropout(nn.Module):
    def __init__(self, module, dropout=0.5):
        super().__init__()
        self.module, self.dropout = module, dropout
        
        self.layer_names = self.get_layer_names()
        for layer_name in self.layer_names:
            weight = getattr(self.module, layer_name)
            self.register_parameter(layer_name+'_raw', nn.Parameter(weight.data))
        return
    
    def _setweights(self):
        for layer_name in self.layer_names:
            raw_w = getattr(self, layer_name+'_raw')
            self.module._parameters[layer_name] = F.dropout(raw_w, self.dropout,
                                                            training=self.training)
        return
    
    def get_layer_names(self):
        names = [f"weight_hh_l{i}" for i in range(self.module.num_layers)]
        if self.module.bidirectional:
            names = names + [name+'_reverse' for name in names]
        return names
    
    def forward(self, *args):
        self._setweights()
        self.module.flatten_parameters()
        return self.module.forward(*args)

In [27]:
class AWDModel(nn.Module):
    def __init__(self, vocab_size, emb_sz=300, hidden_sz=256, dropout=0.5,
                 pad_idx=1, num_layers=2, bidirectional=True):
        super().__init__()
        
        self.pad_idx = pad_idx
        
        self.bidir = 2 if bidirectional else 1
        
        self.embeddings = nn.Embedding(vocab_size, emb_sz,
                                       padding_idx=pad_idx)
        
        self.dropout = nn.Dropout(dropout)
        
        self. rnn    = WeightDropout(nn.LSTM(emb_sz,
                                             hidden_sz,
                                             batch_first=True,
                                             dropout=dropout,
                                             num_layers=num_layers,
                                             bidirectional=bidirectional))
        return
    
    def forward(self, xb):
        seq_lens, mask = get_info(xb)
        
        embedded = self.dropout(self.embeddings(xb))
        
        packed = nn.utils.rnn.pack_padded_sequence(embedded, seq_lens,
                                                   batch_first=True)
        
        packed_out, (hidden_st, cell_st) = self.rnn(packed)
        
        lstm_out, lens = nn.utils.rnn.pad_packed_sequence(packed_out, batch_first=True)
        return lstm_out, hidden_st

In [45]:
class LinearDecoder(nn.Module):
    def __init__(self, hidden_sz, output_sz, tie_encoder=None, bias=True):
        super().__init__()
        self.decoder = nn.Linear(hidden_sz, output_sz, bias=bias)
        if bias: self.decoder.bias.data.zero_()
        if tie_encoder: self.decoder.weight = tie_encoder.weight
        else: nn.init.kaiming_uniform_(self.decoder.weight)
    
    def forward(self, input):
        outputs, hidden_st = input
        print_dims('outputs',outputs)

In [47]:
class ClassifierModel(nn.Module):
    def __init__(self, vocab_sz, emb_sz=300, hidden_sz=256, output_sz=1, dropout=0.5,
                 pad_idx=1, num_layers=2, bidirectional=True):
        super().__init__()
        self.encoder = AWDModel(vocab_sz, emb_sz, hidden_sz, dropout, pad_idx,
                                num_layers, bidirectional)
        bidir = 2 if self.bidirectional else 1
        self.decoder = LinearDecoder(bidir*hidden_sz, output_sz,
                                     tie_encoder=self.encoder.embeddings)
        return
    
    def forward(self, xb):
        output = self.encoder(xb)
        output = self.decoder(output)
        return output

In [43]:
model = ClassifierModel(len(vocab))

In [44]:
model(x1)

size of outputs is torch.Size([32, 70, 512])


In [46]:
y1.shape

torch.Size([32, 70])