In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from sentimentanalyser.utils.data import Path, pad_collate, grandparent_splitter
from sentimentanalyser.utils.data import parent_labeler
from sentimentanalyser.data.text import TextList, ItemList, SplitData
from sentimentanalyser.utils.files import pickle_dump, pickle_load

In [3]:
from sentimentanalyser.preprocessing.processor import TokenizerProcessor
from sentimentanalyser.preprocessing.processor import NuemericalizeProcessor
from sentimentanalyser.preprocessing.processor import CategoryProcessor

In [4]:
from functools import partial

In [5]:
path_imdb = Path("/home/anukoolpurohit/Documents/AnukoolPurohit/Datasets/imdb")

In [6]:
proc_tok = TokenizerProcessor()
proc_num = NuemericalizeProcessor()
proc_cat = CategoryProcessor()

In [7]:
tl_imdb = TextList.from_files(path=path_imdb, folders=['train','test'])
sd_imdb = tl_imdb.split_by_func(partial(grandparent_splitter, valid_name='test'))
ll_imdb = sd_imdb.label_by_func(parent_labeler, proc_x=[proc_tok, proc_num], proc_y=proc_cat)

HBox(children=(IntProgress(value=0, max=13), HTML(value='')))




HBox(children=(IntProgress(value=0, max=13), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [8]:
imdb_data = ll_imdb.clas_databunchify(64)

In [44]:
from sentimentanalyser.utils.callbacks import sched_cos, combine_scheds
from sentimentanalyser.callbacks.training import LR_Find, CudaCallback, GradientClipping
from sentimentanalyser.callbacks.progress import ProgressCallback
from sentimentanalyser.callbacks.scheduler import ParamScheduler, ParamSchedulerCustom
from sentimentanalyser.callbacks.stats import AvgStatsCallback
from sentimentanalyser.callbacks.recorder import Recorder, RecorderCustom
from sentimentanalyser.training.trainer import Trainer

In [22]:
from sentimentanalyser.utils.dev import print_dims
from sentimentanalyser.utils.metrics import accuracy
from sentimentanalyser.utils.data import listify, compose

In [11]:
from sentimentanalyser.models.rnn import AttnAWDModel
from sentimentanalyser.models.regularization import WeightDropout

In [12]:
import torch
import torchtext
from torch import nn
from torchtext import vocab
import matplotlib.pyplot as plt

In [13]:
path_cache = Path('/home/anukoolpurohit/Documents/AnukoolPurohit/Models/WordEmbeddings')

In [14]:
glove_eng = vocab.GloVe(cache=path_cache)

In [15]:
local_vocab = proc_num.vocab

## Custom Optimizer

In [23]:
class Optimizer():
    def __init__(self, params, steppers, **defaults):
        self.param_groups = list(params)
        if not isinstance(self.param_groups[0], list):
            self.param_groups = [self.param_groups]
        self.hypers = [{**defaults} for p in self.param_groups]
        self.steppers = listify(steppers)
    
    def grad_params(self):
        return [(p, hyper) for pg,hyper in zip(self.param_groups, self.hypers)
                for p in pg if p.grad is not None]
    
    def zero_grad(self):
        for p,hyper in self.grad_params():
            p.grad.detach_()
            p.grad.zero_()
    
    def step(self):
        for p,hyper in self.grad_params():
            compose(p, self.steppers, **hyper)

### Basic SGD

In [24]:
def sgd_step(p, lr, **kwargs):
    p.data.add_(-lr, p.grad.data)
    return p

### Weight Decay

In [49]:
def weight_decay(p, lr, wd, **kwargs):
    p.data.mul_(1 - lr*wd)
    return p
weight_decay._defaults = dict(wd=0.)

In [50]:
def l2_reg(p, lr, wd, **kwargs):
    p.grad.data.add_(wd, p.data)
    return p
l2_reg._defaults = dict(wd=0.)

## add defaults

In [51]:
def maybe_update(objs, dest, f):
    for obj in objs:
        for k,v in f(obj).items():
            if k not in dest:
                dest[k] = v

In [52]:
def get_defaults(d):
    return getattr(d, '_defaults', {})

In [53]:
class Optimizer():
    def __init__(self, params, steppers, **defaults):
        self.steppers = listify(steppers)
        maybe_update(self.steppers, defaults, get_defaults)
        
        self.param_groups = list(params)
        
        if not isinstance(self.param_groups[0], list):
            self.param_groups = [self.param_groups]
        
        self.hypers = [{**defaults} for p in self.param_groups]
    
    def grad_params(self):
        return [(p, hyper) for pg,hyper in zip(self.param_groups, self.hypers)
                for p in pg if p.grad is not None]
    
    def zero_grad(self):
        for p,hyper in self.grad_params():
            p.grad.detach_()
            p.grad.zero_()
    
    def step(self):
        for p,hyper in self.grad_params():
            compose(p, self.steppers, **hyper)

# Training

In [56]:
def get_basic(Model, num_layers=2):
    model = Model(proc_num.vocab, glove_eng,num_layers=num_layers)
    loss_func = nn.CrossEntropyLoss()
    opt = Optimizer(model.parameters(),steppers=[weight_decay, sgd_step])
    return model, loss_func, opt

In [57]:
sched = combine_scheds([0.3, 0.7], [sched_cos(1e-4, 1e-3), sched_cos(1e-3, 3e-5)])

In [58]:
cbfs = [partial(AvgStatsCallback, [accuracy]),
        partial(ParamSchedulerCustom,'lr', [sched]),
        partial(GradientClipping, clip=0.1),
        ProgressCallback,
        CudaCallback,
        RecorderCustom
       ]

In [59]:
trainer = Trainer(imdb_data, *get_basic(AttnAWDModel), cb_funcs=cbfs)

In [60]:
trainer.fit(1)

epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,0.693275,0.5,0.693154,0.5,01:36


  parameters = list(filter(lambda p: p.grad is not None, parameters))
