In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.autograd import Variable

from tensorboardX import SummaryWriter
from tqdm import tqdm as tqdm

CUDA = torch.cuda.is_available()

import numpy as np

from sklearn.metrics import accuracy_score

import torchtext
from collections import Counter

In [72]:
CUDA = False

In [73]:
text = torchtext.data.Field(
    lower=True, include_lengths=False, fix_length=2048, tensor_type=torch.FloatTensor, batch_first=True,
    tokenize=lambda x: x, use_vocab=False, sequential=False
)
label = torchtext.data.Field(sequential=False, use_vocab=False)

train, test = torchtext.datasets.IMDB.splits(text, label)

c = Counter(''.join([' '.join(t.text) for t in train]))

ALPHABET = [char[0] for char in c.most_common(62)]  # all other chars used less ~ 100 times in a test
ALPHABET.append('UNK')
ALPHABET.append('PAD')

ALPHABET_LEN = len(ALPHABET)

char2int = {s: i for s, i in zip(ALPHABET, range(ALPHABET_LEN))}

MAXLEN = 1024

BATCH_SIZE = 64
TEST_SIZE = 100

In [74]:

def one_hot(char):
    zeros = np.zeros(ALPHABET_LEN)
    if char in char2int:
        zeros[char2int[char]] = 1.
    else:
        zeros[char2int['UNK']] = 1.

def preprocess_text(text, maxlen=MAXLEN, batch_size=BATCH_SIZE):
    text = [t.lower() for t in text]
    one_hotted_text = np.zeros((batch_size, maxlen, ALPHABET_LEN))
    assert len(text) == batch_size
    for bi, batch in enumerate(text):
        for i, char in enumerate(batch):
            if i >= MAXLEN:
                break
            one_hotted_text[bi, i, char2int.get(char, char2int['UNK'])] = 1.
        if i < MAXLEN:
            for j in range(i+1, MAXLEN):
                one_hotted_text[bi, j, char2int['PAD']] = 1.

    return torch.FloatTensor(one_hotted_text)

all_texts = [t.text for t in train]
all_labels = [int(t.label == 'pos') for t in train]

from sklearn.utils import shuffle
X, y = shuffle(all_texts, all_labels)

batch_idx = 0

def next_batch():
    # BATCH_SIZE(32), ALPHABET_LEN(128), MAXLEN(512)
    global batch_idx
    batch = X[batch_idx:batch_idx+BATCH_SIZE], y[batch_idx:batch_idx+BATCH_SIZE]
    batch_idx += BATCH_SIZE
    return batch

def clip_gradient(optimizer, grad_clip):
    for group in optimizer.param_groups:
        for param in group['params']:
            if param.grad is not None and param.requires_grad:
                param.grad.data.clamp_(-grad_clip, grad_clip)

# Model

In [97]:
class CharCNN(nn.Module):
    
    def __init__(self, hidden_dim=256, kernel_size=16):
        super(CharCNN, self).__init__()
        self.hidden_dim = hidden_dim
        self.kernel_size = kernel_size

        self.cnn = nn.Sequential(
            nn.Conv1d(ALPHABET_LEN, hidden_dim, kernel_size),
            nn.MaxPool1d(2),
            nn.ReLU()
        )
        
        self.projector = nn.Linear(hidden_dim*3, 2)
    
    def forward(self, inp):
        hidden = self.cnn(inp)
        out = self.projector(hidden)
        return out
    
    def describe(self):
        return '_char_cnn_%s_%s' % (self.hidden_dim, self.kernel_size)

In [86]:
model = CharCNN(hidden_dim=256, kernel_size=64)
if CUDA:
    model.cuda()
model.train()

CharCNN(
  (cnn): Sequential(
    (0): Conv1d(64, 256, kernel_size=(64,), stride=(1,))
    (1): MaxPool1d(kernel_size=256, stride=256, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU()
  )
  (projector): Linear(in_features=768, out_features=2, bias=True)
)

In [98]:
projector = nn.Linear(3, 2)
model.cnn(ptex).size()

torch.Size([1, 256, 3])

# Training

In [87]:
writer = SummaryWriter(comment=model.describe())

In [88]:
optimizer = optim.Adam(params=model.parameters(), lr=10**-4)
optimizer.zero_grad()

In [89]:
global_step = 0

In [90]:
N_EPOCHS = 1

loss_f = F.cross_entropy

for epoch in range(N_EPOCHS):
    global batch_idx
    batch_idx = 0
    X, y = shuffle(X, y)
    while batch_idx < len(X) - BATCH_SIZE:
        text, label = next_batch()

        label = Variable(torch.LongTensor(label).cuda()) if CUDA else Variable(torch.LongTensor(label))

        global_step += 1

        one_hotted_text = preprocess_text(text)
        one_hotted_text = Variable(one_hotted_text.cuda()) if CUDA else Variable(one_hotted_text)
        one_hotted_text = one_hotted_text.permute(0, 2, 1)  # (1, 0, 2) for RNN
        prediction = model(one_hotted_text)

        loss = loss_f(prediction, label)

        writer.add_scalar('loss', loss.data[0], global_step=global_step)

        loss.backward()        
        clip_gradient(optimizer, 1e-1)
        optimizer.step()

    # evaluation
    print('Loss after epoch %s:' % epoch)
    print(loss.data[0])
        
    _, idx = torch.max(prediction, 1)
    acc = accuracy_score(label.data.tolist(), idx.data.tolist())
    writer.add_scalar('accuracy_train', acc, global_step=global_step)
    print('In-batch accuracy:', acc)
    
    model.eval()

    predictions = []

    test_texts, test_labels = shuffle(test_texts, test_labels)
    
    for t in test_texts[:TEST_SIZE]:

        ptex = preprocess_text([t], batch_size=1)
        ptex = Variable(ptex.cuda() if CUDA else ptex)
        ptex = ptex.permute(0, 2, 1)  # (1, 0, 2) for RNN
        pred = model(ptex)
        _, idx = torch.max(pred, 1)

        predictions.append(idx.data[0])
    
    lables = test_labels[:TEST_SIZE]
    
    acc = accuracy_score(lables, predictions)
    print('Test accuracy:', acc)
    writer.add_scalar('accuracy_test', acc, global_step=global_step)

    model.train()


RuntimeError: size mismatch, m1: [16384 x 3], m2: [768 x 2] at /pytorch/torch/lib/TH/generic/THTensorMath.c:1434

In [36]:
ptex = preprocess_text(['Hello, world!'], batch_size=1)
ptex = Variable(ptex.cpu())
ptex = ptex.permute(0, 2, 1)

In [39]:
model(ptex)

torch.Size([1, 128, 7])


AssertionError: 