In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchtext import data, datasets

In [2]:
BATCH_SIZE = 100
LR = 0.001
EPOCHS = 15
USE_CUDA = torch.cuda.is_available()

In [3]:
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
TEXT = data.Field(sequential = True, batch_first = True, lower = True)
LABEL = data.Field(sequential = False, batch_first = True)

In [4]:
trainset, testset = datasets.IMDB.splits(TEXT, LABEL)

In [5]:
TEXT.build_vocab(trainset, min_freq = 5)
LABEL.build_vocab(trainset)

In [6]:
trainset, valset = trainset.split(split_ratio = 0.8)
train_iter, val_iter, test_iter = data.BucketIterator.splits((trainset, valset, testset), batch_size = BATCH_SIZE, shuffle = True, repeat = False)

In [7]:
vocab_size = len(TEXT.vocab)
n_classes = 2
print("[TRAIN]: %d \t [VALID]: %d \t [TEST]: %d \t [VOCAB] %d \t [CLASSES] %d" % (len(trainset), len(valset), len(testset), vocab_size, n_classes))

[TRAIN]: 20000 	 [VALID]: 5000 	 [TEST]: 25000 	 [VOCAB] 46159 	 [CLASSES] 2


In [8]:
class BasicRNN(nn.Module):
    def __init__(self, n_layers, hidden_dim, n_vocab, embed_dim, n_classes, dropout_p = 0.2):
        super(BasicRNN, self).__init__()
        print("Building RNN")
        self.n_layers = n_layers
        self.embed = nn.Embedding(n_vocab, embed_dim)
        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(dropout_p)
        self.rnn = nn.RNN(embed_dim, self.hidden_dim, num_layers = self.n_layers, batch_first = True)
        self.out = nn.Linear(self.hidden_dim, n_classes)
        
    def forward(self, x):
        x = self.embed(x)
        h_0 = self._init_state(batch_size = x.size(0))
        x, _ = self.rnn(x, h_0)
        h_t = x[:, -1, :]
        self.dropout(h_t)
        logit = torch.sigmoid(self.out(h_t))
        return logit
    
    def _init_state(self, batch_size = 1):
        weight = next(self.parameters()).data
        return weight.new(self.n_layers, batch_size, self.hidden_dim).zero_()

In [9]:
def train(model, optimizer, train_iter):
    model.train()
    for b, batch in enumerate(train_iter):
        x, y = batch.text.to(DEVICE), batch.label.to(DEVICE)
        y.data.sub_(1)
        optimizer.zero_grad()
        
        logit = model(x)
        loss = F.cross_entropy(logit, y)
        loss.backward()
        optimizer.step()
        
        if b % 50 == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(e, 
                                                                           b * len(x), 
                                                                           len(train_iter.dataset), 
                                                                           100. * b / len(train_iter), 
                                                                           loss.item()))

In [10]:
def evaluate(model, val_iter):
    model.eval()
    corrects, total_loss = 0, 0
    
    for batch in val_iter:
        x, y = batch.text.to(DEVICE), batch.label.to(DEVICE)
        y.data.sub_(1)
        logit = model(x)
        loss = F.cross_entropy(logit, y, reduction = "sum")
        total_loss += loss.item()
        corrects += (logit.max(1)[1].view(y.size()).data == y.data).sum()
        
    size = len(val_iter.dataset)
    avg_loss = total_loss / size
    avg_accuracy = 100.0 * corrects / size
    return avg_loss, avg_accuracy

In [11]:
model = BasicRNN(n_layers = 1, hidden_dim = 256, n_vocab = vocab_size, embed_dim = 128, n_classes = n_classes, dropout_p = 0.5).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr = LR)
for e in range(1, EPOCHS + 1):
    train(model, optimizer, train_iter)
    val_loss, val_accuracy = evaluate(model, val_iter)
    print("[EPOCH: %d], Validation Loss: %5.2f | Validation Accuracy: %5.2f" % (e, val_loss, val_accuracy))

Building RNN
[EPOCH: 1], Validation Loss:  0.69 | Validation Accuracy: 49.00
[EPOCH: 2], Validation Loss:  0.69 | Validation Accuracy: 49.00
[EPOCH: 3], Validation Loss:  0.69 | Validation Accuracy: 49.00
[EPOCH: 4], Validation Loss:  0.69 | Validation Accuracy: 49.00
[EPOCH: 5], Validation Loss:  0.69 | Validation Accuracy: 49.00
[EPOCH: 6], Validation Loss:  0.70 | Validation Accuracy: 49.00
[EPOCH: 7], Validation Loss:  0.70 | Validation Accuracy: 49.00
[EPOCH: 8], Validation Loss:  0.70 | Validation Accuracy: 49.00
[EPOCH: 9], Validation Loss:  0.69 | Validation Accuracy: 50.00
[EPOCH: 10], Validation Loss:  0.69 | Validation Accuracy: 51.00
[EPOCH: 11], Validation Loss:  0.69 | Validation Accuracy: 50.00
[EPOCH: 12], Validation Loss:  0.69 | Validation Accuracy: 50.00
[EPOCH: 13], Validation Loss:  0.69 | Validation Accuracy: 50.00
[EPOCH: 14], Validation Loss:  0.69 | Validation Accuracy: 50.00
[EPOCH: 15], Validation Loss:  0.69 | Validation Accuracy: 50.00


In [12]:
test_loss, test_acc = evaluate(model,test_iter)
print("Test Loss: %5.2f | Test Accuracy: %5.2f" % (test_loss, test_acc))

Test Loss:  0.70 | Test Accuracy: 50.00


In [13]:
class BasicGRU(nn.Module):
    def __init__(self, n_layers, hidden_dim, n_vocab, embed_dim, n_classes, dropout_p = 0.2):
        super(BasicGRU, self).__init__()
        print("Building GRU")
        self.n_layers = n_layers
        self.embed = nn.Embedding(n_vocab, embed_dim)
        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(dropout_p)
        self.gru = nn.GRU(embed_dim, self.hidden_dim, num_layers = self.n_layers, batch_first = True)
        self.out = nn.Linear(self.hidden_dim, n_classes)
        
    def forward(self, x):
        x = self.embed(x)
        h_0 = self._init_state(batch_size = x.size(0))
        x, _ = self.gru(x, h_0)
        h_t = x[:, -1, :]
        self.dropout(h_t)
        logit = torch.sigmoid(self.out(h_t))
        return logit
    
    def _init_state(self, batch_size = 1):
        weight = next(self.parameters()).data
        return weight.new(self.n_layers, batch_size, self.hidden_dim).zero_()

In [14]:
model = BasicGRU(n_layers = 1, hidden_dim = 256, n_vocab = vocab_size, embed_dim = 128, n_classes = n_classes, dropout_p = 0.5).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr = LR)
for e in range(1, EPOCHS + 1):
    train(model, optimizer, train_iter)
    val_loss, val_accuracy = evaluate(model, val_iter)
    print("[EPOCH: %d], Validation Loss: %5.2f | Validation Accuracy: %5.2f" % (e, val_loss, val_accuracy))

Building GRU
[EPOCH: 1], Validation Loss:  0.69 | Validation Accuracy: 50.00
[EPOCH: 2], Validation Loss:  0.69 | Validation Accuracy: 50.00
[EPOCH: 3], Validation Loss:  0.69 | Validation Accuracy: 51.00
[EPOCH: 4], Validation Loss:  0.69 | Validation Accuracy: 52.00
[EPOCH: 5], Validation Loss:  0.69 | Validation Accuracy: 50.00
[EPOCH: 6], Validation Loss:  0.63 | Validation Accuracy: 67.00
[EPOCH: 7], Validation Loss:  0.48 | Validation Accuracy: 82.00
[EPOCH: 8], Validation Loss:  0.46 | Validation Accuracy: 85.00
[EPOCH: 9], Validation Loss:  0.46 | Validation Accuracy: 84.00
[EPOCH: 10], Validation Loss:  0.45 | Validation Accuracy: 86.00
[EPOCH: 11], Validation Loss:  0.45 | Validation Accuracy: 86.00
[EPOCH: 12], Validation Loss:  0.45 | Validation Accuracy: 86.00
[EPOCH: 13], Validation Loss:  0.45 | Validation Accuracy: 85.00
[EPOCH: 14], Validation Loss:  0.45 | Validation Accuracy: 86.00
[EPOCH: 15], Validation Loss:  0.45 | Validation Accuracy: 85.00


In [15]:
test_loss, test_acc = evaluate(model,test_iter)
print("Test Loss: %5.2f | Test Accuracy: %5.2f" % (test_loss, test_acc))

Test Loss:  0.46 | Test Accuracy: 84.00


In [16]:
class BasicLSTM(nn.Module):
    def __init__(self, n_layers, hidden_dim, n_vocab, embed_dim, n_classes, dropout_p = 0.2):
        super(BasicLSTM, self).__init__()
        print("Building LSTM")
        self.n_layers = n_layers
        self.embed = nn.Embedding(n_vocab, embed_dim)
        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(dropout_p)
        self.lstm = nn.LSTM(embed_dim, self.hidden_dim, num_layers = self.n_layers, batch_first = True)
        self.out = nn.Linear(self.hidden_dim, n_classes)
        
    def forward(self, x):
        x = self.embed(x)
        h_0 = self._init_state(batch_size = x.size(0))
        c_0 = self._init_state(batch_size = x.size(0))
        
        x, _ = self.lstm(x, (h_0, c_0))
        h_t = x[:, -1, :]
        self.dropout(h_t)
        logit = torch.sigmoid(self.out(h_t))
        return logit
    
    def _init_state(self, batch_size = 1):
        weight = next(self.parameters()).data
        return weight.new(self.n_layers, batch_size, self.hidden_dim).zero_()

In [17]:
model = BasicLSTM(n_layers = 1, hidden_dim = 256, n_vocab = vocab_size, embed_dim = 128, n_classes = n_classes, dropout_p = 0.5).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr = LR)
for e in range(1, EPOCHS + 1):
    train(model, optimizer, train_iter)
    val_loss, val_accuracy = evaluate(model, val_iter)
    print("[EPOCH: %d], Validation Loss: %5.2f | Validation Accuracy: %5.2f" % (e, val_loss, val_accuracy))

Building LSTM
[EPOCH: 1], Validation Loss:  0.69 | Validation Accuracy: 49.00
[EPOCH: 2], Validation Loss:  0.69 | Validation Accuracy: 49.00
[EPOCH: 3], Validation Loss:  0.70 | Validation Accuracy: 49.00
[EPOCH: 4], Validation Loss:  0.70 | Validation Accuracy: 51.00
[EPOCH: 5], Validation Loss:  0.69 | Validation Accuracy: 51.00
[EPOCH: 6], Validation Loss:  0.69 | Validation Accuracy: 52.00
[EPOCH: 7], Validation Loss:  0.70 | Validation Accuracy: 51.00
[EPOCH: 8], Validation Loss:  0.70 | Validation Accuracy: 51.00
[EPOCH: 9], Validation Loss:  0.70 | Validation Accuracy: 51.00
[EPOCH: 10], Validation Loss:  0.70 | Validation Accuracy: 53.00
[EPOCH: 11], Validation Loss:  0.62 | Validation Accuracy: 68.00
[EPOCH: 12], Validation Loss:  0.57 | Validation Accuracy: 72.00
[EPOCH: 13], Validation Loss:  0.51 | Validation Accuracy: 79.00
[EPOCH: 14], Validation Loss:  0.55 | Validation Accuracy: 74.00
[EPOCH: 15], Validation Loss:  0.49 | Validation Accuracy: 82.00


In [18]:
test_loss, test_acc = evaluate(model,test_iter)
print("Test Loss: %5.2f | Test Accuracy: %5.2f" % (test_loss, test_acc))

Test Loss:  0.50 | Test Accuracy: 80.00
