In [0]:
from torchtext import data,datasets
from torchtext.vocab import GloVe,FastText,CharNGram
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import torch
import sys

In [0]:
is_cuda = torch.cuda.is_available()

In [0]:
TEXT = data.Field(lower=True,fix_length=200,batch_first=False)
LABEL = data.Field(sequential=False,)

In [6]:
from torchtext import datasets
train, test = datasets.IMDB.splits(TEXT, LABEL)

downloading aclImdb_v1.tar.gz


aclImdb_v1.tar.gz: 100%|██████████| 84.1M/84.1M [00:07<00:00, 11.0MB/s]


In [7]:
TEXT.build_vocab(train, vectors=GloVe(name='6B', dim=300),max_size=10000,min_freq=10)
LABEL.build_vocab(train,)

.vector_cache/glove.6B.zip: 862MB [00:52, 16.6MB/s]                           
100%|█████████▉| 399307/400000 [00:39<00:00, 10251.32it/s]

In [8]:
train_iter, test_iter = data.BucketIterator.splits((train, test), batch_size=32, device=-1)
train_iter.repeat = False
test_iter.repeat = False

The `device` argument should be set by using `torch.device` or passing a string as an argument. This behavior will be deprecated soon and currently defaults to cpu.
The `device` argument should be set by using `torch.device` or passing a string as an argument. This behavior will be deprecated soon and currently defaults to cpu.


In [0]:
class IMDBRnn(nn.Module):
    
    def __init__(self,vocab,hidden_size,n_cat,bs=1,nl=2):
        super().__init__()
        self.hidden_size = hidden_size
        self.bs = bs
        self.nl = nl
        self.e = nn.Embedding(n_vocab,hidden_size)
        self.rnn = nn.LSTM(hidden_size,hidden_size,nl)
        self.fc2 = nn.Linear(hidden_size,n_cat)
        self.softmax = nn.LogSoftmax(dim=-1)
        
    def forward(self,inp):
        bs = inp.size()[1]
        if bs != self.bs:
            self.bs = bs
        e_out = self.e(inp)
        h0 = c0 = Variable(e_out.data.new(*(self.nl,self.bs,self.hidden_size)).zero_())
        rnn_o,_ = self.rnn(e_out,(h0,c0)) 
        rnn_o = rnn_o[-1]
        fc = F.dropout(self.fc2(rnn_o),p=0.8)
        return self.softmax(fc)

In [0]:
n_vocab = len(TEXT.vocab)
n_hidden = 100

In [0]:
model = IMDBRnn(n_vocab,n_hidden,3,bs=64)
model = model.cuda()

optimizer = optim.Adam(model.parameters(),lr=0.001)

def fit(epoch,model,data_loader,phase='training',volatile=False):
    if phase == 'training':
        model.train()
    if phase == 'validation':
        model.eval()
        volatile=True
    running_loss = 0.0
    running_correct = 0
    for batch_idx , batch in enumerate(data_loader):
        text , target = batch.text , batch.label
        if is_cuda:
            text,target = text.cuda(),target.cuda()
        
        if phase == 'training':
            optimizer.zero_grad()
        output = model(text)
        loss = F.nll_loss(output,target)
        
        running_loss += F.nll_loss(output,target).item()
        preds = output.data.max(dim=1,keepdim=True)[1]
        running_correct += preds.eq(target.data.view_as(preds)).cpu().sum()
        if phase == 'training':
            loss.backward()
            optimizer.step()
    
    loss = running_loss/len(data_loader.dataset)
    accuracy = 100. * running_correct/len(data_loader.dataset)
    
    print("Phase: {}".format(phase),
         "\tLoss is: {}".format(loss),
         "\tAccuray: {}".format(accuracy))
    return loss,accuracy

In [16]:
train_losses , train_accuracy = [],[]
val_losses , val_accuracy = [],[]

for epoch in range(1,100):

    epoch_loss, epoch_accuracy = fit(epoch,model,train_iter,phase='training')
    val_epoch_loss , val_epoch_accuracy = fit(epoch,model,test_iter,phase='validation')
    train_losses.append(epoch_loss)
    train_accuracy.append(epoch_accuracy)
    val_losses.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)

Phase: training 	Loss is: 0.03142602083921432 	Accuray: 24
Phase: validation 	Loss is: 0.031270977120399474 	Accuray: 24
Phase: training 	Loss is: 0.031386156566143034 	Accuray: 24
Phase: validation 	Loss is: 0.03130697370529175 	Accuray: 23
Phase: training 	Loss is: 0.03117419183731079 	Accuray: 24
Phase: validation 	Loss is: 0.03125808909654617 	Accuray: 24
Phase: training 	Loss is: 0.031038395426273346 	Accuray: 24
Phase: validation 	Loss is: 0.031164731388092042 	Accuray: 24
Phase: training 	Loss is: 0.03049092960834503 	Accuray: 24
Phase: validation 	Loss is: 0.03012761415719986 	Accuray: 23
Phase: training 	Loss is: 0.030075023064613342 	Accuray: 24
Phase: validation 	Loss is: 0.031222901406288146 	Accuray: 24
Phase: training 	Loss is: 0.029174556016921996 	Accuray: 25
Phase: validation 	Loss is: 0.029280479822158815 	Accuray: 24
Phase: training 	Loss is: 0.028137837705612182 	Accuray: 26
Phase: validation 	Loss is: 0.028660036118030547 	Accuray: 25
Phase: training 	Loss is: 0.02

KeyboardInterrupt: ignored