In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import numpy as np
from torch.utils.data import Dataset, DataLoader, TensorDataset

In [2]:
use_cuda = torch.cuda.is_available()
print use_cuda

True


In [3]:
Xtrain = torch.cuda.LongTensor(np.loadtxt('Xtrain.tsv', delimiter="\t", dtype=int))
Ytrain = torch.cuda.LongTensor(np.loadtxt('Ytrain.tsv', delimiter="\t", dtype=int))
Xvalid = torch.cuda.LongTensor(np.loadtxt('Xvalid.tsv', delimiter="\t", dtype=int))
Yvalid = torch.cuda.LongTensor(np.loadtxt('Yvalid.tsv', delimiter="\t", dtype=int))

In [11]:
smallXtrain = Xtrain[0:40,:]
smallYtrain = Ytrain[0:40]
smallXvalid = Xvalid[0:40,:]
smallYvalid = Yvalid[0:40]

In [5]:
print Xtrain.size()
print Ytrain.size()

torch.Size([86402, 3])
torch.Size([86402])


In [6]:
print smallXtrain.size()
print smallYtrain.size()
print Xtrain.size()
print Ytrain.size()

torch.Size([40, 3])
torch.Size([40])
torch.Size([86402, 3])
torch.Size([86402])


In [7]:
trainData = TensorDataset(Xtrain, Ytrain)
validData = TensorDataset(Xvalid, Yvalid)

In [8]:
def computePerplexity(output, target):
    m = output.size()[0]
    logsoftmax = nn.LogSoftmax()
    final_out = logsoftmax(output)
    targetUnsqueezed = torch.unsqueeze(target, 1)
    expon = torch.sum(torch.gather(final_out, 1, targetUnsqueezed))
    perplexity = np.power(2, -1.0/m * expon.data[0]/np.log(2))
    return perplexity

In [9]:
class LanguageHyperParameter(object):
    def __init__(self, gramSize, embeddingDimension, numHiddenUnits, numWords, numEpochs, lr, batchSize):
        self.gramSize = gramSize
        self.embeddingDimension = embeddingDimension
        self.numHiddenUnits = numHiddenUnits
        self.numWords = numWords
        self.numEpochs = numEpochs
        self.lr = lr
        self.batchSize = batchSize

In [10]:
class RecurrentLanguageModeler(nn.Module):
    def __init__(self, hyperparams):
        super(RecurrentLanguageModeler, self).__init__()
        numWords = hyperparams.numWords
        embeddingDimension = hyperparams.embeddingDimension
        self.contextSize = hyperparams.gramSize-1
        self.numHiddenUnits = hyperparams.numHiddenUnits
        self.embeddings = nn.Embedding(numWords, embeddingDimension)
        self.rnn = nn.RNN(input_size=embeddingDimension,
                          hidden_size=self.numHiddenUnits,
                          nonlinearity='tanh',
                          batch_first=False)
        self.h2o = nn.Linear(numHiddenUnits, numWords)

    def forward(self, X):
        embeds = self.embeddings(X)
        embeds = embeds.permute(1,0,2)
        h0 = Variable(torch.zeros(1
                                  ,embeds.size()[1]
                                  ,self.numHiddenUnits)
                                  .type(torch.cuda.FloatTensor))
        rnn_out, h_n = self.rnn(embeds, h0)
        h_n = torch.squeeze(h_n)
        output = self.h2o(h_n)
        return output

In [33]:
def trainModel(trainData, validData, hyperparams):
    numHiddenUnits = hyperparams.numHiddenUnits
    batchSize = hyperparams.batchSize
    contextSize = hyperparams.gramSize-1
    numEpochs = hyperparams.numEpochs
    m = len(trainData)
    n = len(validData)
    model = RecurrentLanguageModeler(hyperparams)
    model.cuda()
    optimizer = optim.SGD(model.parameters(), lr=hyperparams.lr, momentum=0.0)
    criterion = nn.CrossEntropyLoss(size_average=False)
    stats = []
    for e in xrange(numEpochs):
        total_loss = 0
        dataloader = DataLoader(trainData, batch_size=hyperparams.batchSize, shuffle=True)
        for b, databatch in enumerate(dataloader):
            Xbatch, Ybatch = databatch
            Xbatch = Variable(Xbatch)
            Ybatch = Variable(Ybatch.type(torch.cuda.LongTensor))
            output = model(Xbatch)
            loss =  criterion(output, Ybatch)
            model.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.data[0]
        validDataLoader = DataLoader(validData, batch_size=n, shuffle=False)
        totalValidationLoss = 0
        for b, databatch in enumerate(validDataLoader):
            Xbatch, Ybatch = databatch
            Xbatch = Variable(Xbatch)
            Ybatch = Variable(Ybatch.type(torch.cuda.LongTensor))
            output = model(Xbatch)
            loss = criterion(output, Ybatch)
            perplexity = computePerplexity(output, Ybatch)
            totalValidationLoss += loss.data[0]
        stats.append([e+1, total_loss/m, totalValidationLoss/n, perplexity])
        print e+1, total_loss/m, totalValidationLoss/n, perplexity
    return stats

In [34]:
numWords = 8000
embeddingDimension = 16
numHiddenUnits = 128
numEpochs = 100
gramSize = 4
lr = 0.01
batchSize = 16
hyperparams = LanguageHyperParameter(gramSize, embeddingDimension, numHiddenUnits, numWords, numEpochs, lr, batchSize)
experiment_name = "torch_rnn_language_"+"_h_"+str(numHiddenUnits)+"_lr_"+str(lr)+"_b_"+str(batchSize)+"_ed_"+str(embeddingDimension)

In [35]:
stats = trainModel(trainData, validData, hyperparams)

TypeError: 'AddmmBackward' object is not callable

In [19]:
import matplotlib.pyplot as plt

stats = np.array(stats)
epochs = stats[:,0]
train_loss = stats[:,1]
val_loss = stats[:,2]
val_perplexity = stats[:,3]
plt.figure()
plt.plot(epochs, train_loss, 'b', epochs, val_loss, 'r')
plt.ylim(ymin=0)    
plt.title('Loss plot')
plt.savefig(experiment_name+"loss_plot.pdf")

plt.figure()
plt.plot(epochs, val_perplexity)
plt.ylim(ymin=0)    
plt.title('Perplexity plot')
plt.savefig(experiment_name+"perplexity_plot.pdf")


with open(experiment_name+".language_stats",'w') as f:
    for stat in stats:
        f.write("\t".join([str(val) for val in stat]))
        f.write("\n")