In [26]:
import pickle
import sys
import argparse

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

import numpy as np


def train(model, train, val, num_epochs, batch_size):
    data_train, labels_train = train
    num_train = data_train.shape[1]
    print(num_train)
    
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = optim.Adam(model.parameters())
    
    for epoch in range(num_epochs):
        print('Beginning epoch %d / %d' % (epoch+1, num_epochs))
        check_accuracy(model, val)
        
        for i in range(num_train//batch_size):
            indices = list(range(i*batch_size, (i+1)*batch_size))
            
            X = torch.Tensor(data_train[:, indices, :])
            y = torch.LongTensor(labels_train[indices])
            X_var = Variable(X.type(gpu_dtype))
            y_var = Variable(y.type(gpu_dtype).long())
            
            scores = model(X_var)
            loss = criterion(scores, y_var)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

def check_accuracy(model, data):
    url_array, label_array = data
    num_samples = label_array.shape[0]
    
    X = torch.Tensor(url_array)
    y = torch.LongTensor(label_array)
    X_var = Variable(X.type(gpu_dtype))
    
    model.eval()
    scores = model(X_var)
    _, preds = scores.data.cpu().max(1)
    
    num_correct = (preds == y).sum()
    
    print('accuracy: ', num_correct / num_samples)
    # further metrics to monitor due to class imbalance
    print('true pos:', label_array.sum() / num_samples)
    print('pred pos:', preds.sum() / num_samples)

In [27]:
# configure GPU datatype
use_gpu = torch.cuda.is_available()
if not use_gpu:
    raise Exception('error: CUDA library unavailable')

global gpu_dtype
gpu_dtype = torch.cuda.FloatTensor

# load train, val, test data
with open('datasets'+'/data.pkl', 'rb') as f:
    url_array, label_array = pickle.load(f)

# partition dataset (this must sum to <50K)
num_train = 40000
num_val = 2000
num_test = 8000

data_train = url_array[:, :num_train, :]
labels_train = label_array[:num_train]
data_val = url_array[:, num_train:num_train+num_val, :]
labels_val = label_array[num_train:num_train+num_val]
data_test = url_array[:, num_train+num_val:num_train+num_val+num_test, :]
labels_test = label_array[num_train+num_val:num_train+num_val+num_test]

In [24]:
num_epochs = 5000
batch_size = 32

from model import CharRNN

# initialize model and train on GPU
model = CharRNN()
model = model.type(gpu_dtype)

# train model on training data, reporting accuracy on held out validation set
train(model, (data_train, labels_train), (data_val, labels_val),
      num_epochs, batch_size)

40000
Beginning epoch 1 / 5000
accuracy:  0.519
true pos: 0.486
pred pos: 0.007
Beginning epoch 2 / 5000
accuracy:  0.494
true pos: 0.486
pred pos: 0.991
Beginning epoch 3 / 5000
accuracy:  0.725
true pos: 0.486
pred pos: 0.284
Beginning epoch 4 / 5000
accuracy:  0.819
true pos: 0.486
pred pos: 0.455
Beginning epoch 5 / 5000
accuracy:  0.843
true pos: 0.486
pred pos: 0.456
Beginning epoch 6 / 5000
accuracy:  0.8835
true pos: 0.486
pred pos: 0.4745
Beginning epoch 7 / 5000
accuracy:  0.889
true pos: 0.486
pred pos: 0.448
Beginning epoch 8 / 5000
accuracy:  0.88
true pos: 0.486
pred pos: 0.423
Beginning epoch 9 / 5000
accuracy:  0.9195
true pos: 0.486
pred pos: 0.4395
Beginning epoch 10 / 5000
accuracy:  0.9315
true pos: 0.486
pred pos: 0.4515
Beginning epoch 11 / 5000
accuracy:  0.942
true pos: 0.486
pred pos: 0.454
Beginning epoch 12 / 5000
accuracy:  0.8785
true pos: 0.486
pred pos: 0.5395
Beginning epoch 13 / 5000
accuracy:  0.928
true pos: 0.486
pred pos: 0.521
Beginning epoch 14 / 

KeyboardInterrupt: 