In [13]:
import math
import time
import takepod
from takepod.datasets import Iterator, BasicSupervisedImdbDataset
from takepod.storage import Field, Vocab
from takepod.storage.vectorizers.impl import GloVe
from takepod.models import Experiment, AbstractSupervisedModel
from takepod.models.trainer import AbstractTrainer

def lowercase(raw, data):
    return raw, [d.lower() for d in data]

In [2]:
fields = BasicSupervisedImdbDataset.get_default_fields()

In [3]:
def create_fields():
    # Define the vocabulary
    vocab = Vocab(max_size=20000, min_freq=5)
    text = Field(name='text', vocab=Vocab(), tokenizer='spacy', store_as_raw=False)
    # Add preprpocessing hooks to model
    # 1. Lowercase
    text.add_posttokenize_hook(lowercase)
        
    # Improve readability: LabelField
    label = Field(name='label', vocab=Vocab(specials=()), is_target=True, tokenize=False)
    return {text.name : text, label.name: label}

In [4]:
fields = create_fields()
imdb_train, imdb_test = BasicSupervisedImdbDataset.get_train_test_dataset(fields)

In [5]:
# Construct vectoziter based on vocab
vocab = fields['text'].vocab
vectorizer = GloVe()
vectorizer.load_vocab(vocab)
embeddings = vectorizer.get_embedding_matrix(vocab)

In [6]:
# Works on simplify vectorizer branch
embeddings = GloVe().load_vocab(vocab)
print(embeddings)

[[ 0.         0.         0.        ...  0.         0.         0.       ]
 [ 0.         0.         0.        ...  0.         0.         0.       ]
 [ 0.04656    0.21318   -0.0074364 ...  0.0090611 -0.20989    0.053913 ]
 ...
 [ 0.         0.         0.        ...  0.         0.         0.       ]
 [ 0.         0.         0.        ...  0.         0.         0.       ]
 [ 0.         0.         0.        ...  0.         0.         0.       ]]


In [7]:
train_iterator = Iterator(dataset=imdb_train, batch_size=32)
valid_iterator = Iterator(dataset=imdb_train, batch_size=32)

In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F
RNNS = ['LSTM', 'GRU']

class Encoder(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, nlayers=1, dropout=0.,
                   bidirectional=True, rnn_type='GRU'):
        super(Encoder, self).__init__()
        
        self.bidirectional = bidirectional
        assert rnn_type in RNNS, 'Use one of the following: {}'.format(str(RNNS))
        rnn_cell = getattr(nn, rnn_type) # fetch constructor from torch.nn, cleaner than if
        self.rnn = rnn_cell(embedding_dim, hidden_dim, nlayers, 
                                dropout=dropout, bidirectional=bidirectional)

    def forward(self, input, hidden=None):
        return self.rnn(input, hidden)


class Attention(nn.Module):
    def __init__(self, query_dim, key_dim, value_dim):
        super(Attention, self).__init__()
        self.scale = 1. / math.sqrt(query_dim)

    def forward(self, query, keys, values):
        # Query = [BxQ]
        # Keys = [TxBxK]
        # Values = [TxBxV]
        # Outputs = a:[TxB], lin_comb:[BxV]

        # Here we assume q_dim == k_dim (dot product attention)

        query = query.unsqueeze(1) # [BxQ] -> [Bx1xQ]
        keys = keys.transpose(0,1).transpose(1,2) # [TxBxK] -> [BxKxT]
        energy = torch.bmm(query, keys) # [Bx1xQ]x[BxKxT] -> [Bx1xT]
        energy = F.softmax(energy.mul_(self.scale), dim=2) # scale, normalize

        values = values.transpose(0,1) # [TxBxV] -> [BxTxV]
        linear_combination = torch.bmm(energy, values).squeeze(1) #[Bx1xT]x[BxTxV] -> [BxV]
        return energy, linear_combination

class AttentionRNN(nn.Module):
    def __init__(self, cfg):
        super(AttentionRNN, self).__init__()
        self.config = cfg
        self.embedding = nn.Embedding(cfg.vocab_size, cfg.embed_dim)
        self.encoder = Encoder(cfg.embed_dim, cfg.hidden_dim, cfg.nlayers, 
                               cfg.dropout, cfg.bidirectional, cfg.rnn_type)
        attention_dim = cfg.hidden_dim if not cfg.bidirectional else 2 * cfg.hidden_dim
        self.attention = Attention(attention_dim, attention_dim, attention_dim)
        self.decoder = nn.Linear(attention_dim, cfg.num_classes)

        size = 0
        for p in self.parameters():
            size += p.nelement()
        print('Total param size: {}'.format(size))


    def forward(self, input):
        outputs, hidden = self.encoder(self.embedding(input))
        if isinstance(hidden, tuple): # LSTM
            hidden = hidden[1] # take the cell state

        if self.encoder.bidirectional: # need to concat the last 2 hidden layers
            hidden = torch.cat([hidden[-1], hidden[-2]], dim=1)
        else:
            hidden = hidden[-1]

        energy, linear_combination = self.attention(hidden, outputs, outputs) 
        logits = self.decoder(linear_combination)
        meta = {'attention_weights':energy}

        return logits, meta

In [9]:
class TorchModel(AbstractSupervisedModel):
    def __init__(self, model_class, config, criterion):
        self.model_class = model_class
        self.config = config
        self._model = model_class(config)
        self.criterion = criterion

    def __call__(self, x):
        return self._model(x)

    def fit(self, X, y, **kwargs):
        # This is a _step_ in the iteration process.
        # Should assume model is in training mode
        return_dict = self(X)
        logits = return_dict['pred']
        loss = self.criterion(logits.view(-1, self.config.nlabels), y.squeeze())
        return_dict['loss'] = loss
        return return_dict
        
    def predict(self, X, **kwargs):
        # Assumes that the model is in _eval_ mode
        return_dict = self(X)
        return return_dict
        
    def reset(self, **kwargs):
        # Restart model
        self._model = self.model_class(self.config)
        
    def zero_grad(self):
        self._model.zero_grad()
    
    def set_train_mode(self):
        self._model.train()
    
    def set_eval_mode(self):
        self._model.eval()
        
    def parameters(self):
        return self._model.parameters()

In [10]:
class TorchTrainer(AbstractTrainer):
    def __init__(self, optimizer, num_epochs, valid_iterator=None):
        self.optimizer = optimizer
        self.epochs = num_epochs
        self.valid_iterator = valid_iterator
    
    def train(self,
              model: AbstractSupervisedModel,
              iterator: Iterator,
              feature_transformer,
              label_transform_fun,
              **kwargs):
        # Actual training loop
        # Single training epoch
        model.set_train_mode()
        for batch_num, (batch_x, batch_y) in enumerate(iterator):
            t = time.time()
            X = torch.from_numpy(
                feature_transformer.transform(batch_x)
                )
            y = torch.from_numpy(
                label_transform_fun(batch_y)
                )

            model.zero_grad()
            return_dict = model.fit(X, y)
            loss = return_dict['loss']   
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self._model.parameters(), self.config.clip)
            self.optimizer.step()
            print("[Batch]: {}/{} in {:.5f} seconds".format(
                    batch_num, len(iterator), time.time() - t), end='\r', flush=True)
            
        model.set_eval_mode()
        with torch.no_grad():
            for batch_num, batch_x, batch_y in enumerate(self.valid_iterator):
                X = feature_transformer.transform(batch_x)
                y = label_transform_fun(batch_y)

                return_dict = model.fit(X, y)

                loss = return_dict['loss']

In [11]:
class Config(dict):
    def __init__(self, *args, **kwargs): 
        dict.__init__(self, *args, **kwargs)     
            
    def __getattr__(self, key):
        #print(key)
        return self[key]

    def __setattr__(self, key, value):
        #print(key, value)
        self[key] = value

In [19]:
criterion = nn.CrossEntropyLoss()
label_vocab = fields['label'].vocab
# Ugly but just to check
config_dict = {
    'rnn_type': 'LSTM',
    'embed_dim': 300,
    'hidden_dim': 300,
    'nlayers': 2,
    'lr': 1e-3,
    'clip': 5,
    'epochs': 5,
    'batch_size': 32,
    'dropout': 0.,
    'bidirectional': True,
    'cuda': False,
    'vocab_size': len(vocab),
    'num_classes': len(label_vocab)
}

config = Config(config_dict)
print(config)
print(dir(config), vars(config))
print(config.vocab_size)

#model = TorchModel(AttentionRNN, config, criterion)

optimizer = optimizer = torch.optim.Adam(model.parameters(), config.lr, amsgrad=True)
trainer = TorchTrainer(optimizer, config.epochs, valid_iterator)


{'rnn_type': 'LSTM', 'embed_dim': 300, 'hidden_dim': 300, 'nlayers': 2, 'lr': 0.001, 'clip': 5, 'epochs': 5, 'batch_size': 32, 'dropout': 0.0, 'bidirectional': True, 'cuda': False, 'vocab_size': 146592, 'num_classes': 2}
['__class__', '__contains__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'clear', 'copy', 'fromkeys', 'get', 'items', 'keys', 'pop', 'popitem', 'setdefault', 'update', 'values'] {}
146592


In [None]:
from functools import partial
train_iterator = partial(Iterator, batch_size=32)
valid_iterator = Iterator(dataset=imdb_train, batch_size=32)

experiment = Experiment(TorchModel, trainer=trainer, 
                        training_iterator_callable=train_iterator)
experiment.fit(
    imdb_train,
    model_kwargs={
        'model_class': AttentionRNN, 
        'config': config, 
        'criterion': criterion
    },
)

Total param size: 47588402
