In [1]:
import csv
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F

num_classes = 5
num_epochs = 100
length = 50
embedding_dim = 50
feature_maps = 100
dropout_rate = 0.5
hidden_size = 100
batch_size = 50
lr = 1e-3
device = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
df = pd.read_csv('train.tsv', sep='\t')[:10000]
df = df.reindex(np.random.permutation(df.shape[0]))
df_train = df[:int(df.shape[0] * 0.8)]
df_test = df[int(df.shape[0] * 0.8):]

In [3]:
X_train = list()
vocab = dict({'<pad>': 0, '<unk>': 1})
for phrase in df_train["Phrase"]:
    indices = list()
    words = phrase.split(' ')
    for word in words:
        if word not in vocab:
            vocab[word] = len(vocab)
        indices.append(vocab[word])
    while len(indices) < length:
        indices.append(0)
    X_train.append(indices)
X_train = torch.tensor(X_train).to(device)

In [4]:
Y_train = torch.zeros((df_train.shape[0], num_classes)).to(device)
for idx, sentiment in enumerate(df_train["Sentiment"]):
    Y_train[idx, sentiment] = 1

In [5]:
X_test = list()
for phrase in df_test["Phrase"]:
    indices = list()
    words = phrase.split(' ')
    for word in words:
        if word in vocab:
            indices.append(vocab[word])
        else:
            indices.append(1)
    while len(indices) < length:
        indices.append(0)
    X_test.append(indices)
X_test = torch.tensor(X_test).to(device)

In [6]:
Y_test = torch.zeros((df_test.shape[0], num_classes)).to(device)
for idx, sentiment in enumerate(df_test["Sentiment"]):
    Y_test[idx, sentiment] = 1

In [7]:
glove = pd.read_csv("glove.6B.50d.txt", sep=' ', header=None, quoting=csv.QUOTE_NONE)
vectors = torch.zeros((len(vocab), embedding_dim)).to(device)
torch.nn.init.uniform_(vectors)
vectors[0] = torch.zeros_like(vectors[0])
for row in glove.iterrows():
    if row[1][0] in vocab:
        vectors[vocab[row[1][0]]] = torch.tensor(np.array(row[1][1:]).astype(np.float32))

In [8]:
class CNN(nn.Module):
    def __init__(self, num_embeddings, vectors=None):
        super().__init__()

        if vectors is None:
            self.embed = nn.Embedding(num_embeddings, embedding_dim)
        else:
            self.embed = nn.Embedding(num_embeddings, embedding_dim).from_pretrained(vectors)

        self.conv1 = nn.Conv2d(1, feature_maps, (3, embedding_dim))
        self.conv2 = nn.Conv2d(1, feature_maps, (4, embedding_dim))
        self.conv3 = nn.Conv2d(1, feature_maps, (5, embedding_dim))
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(feature_maps * 3, num_classes)

    def forward(self, x):
        x = self.embed(x)
        x = torch.unsqueeze(x, dim=1)

        x1 = torch.squeeze(F.relu(self.conv1(x)))
        x1 = torch.squeeze(F.max_pool1d(x1, x1.size(2)))

        x2 = torch.squeeze(F.relu(self.conv2(x)))
        x2 = torch.squeeze(F.max_pool1d(x2, x2.size(2)))

        x3 = torch.squeeze(F.relu(self.conv3(x)))
        x3 = torch.squeeze(F.max_pool1d(x3, x3.size(2)))

        out = torch.cat([x1, x2, x3], dim=1)
        out = self.dropout(out)
        out = self.fc(out)
        return out

In [9]:
cnn_random = CNN(len(vocab)).to(device)
optimizer = torch.optim.Adam(cnn_random.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [10]:
cnn_random.train()
for epoch in range(num_epochs):
    pred = cnn_random(X_train)
    loss = criterion(pred, Y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [11]:
cnn_random.eval()
pred = cnn_random(X_test).detach().cpu().numpy()
print("Accuracy of CNN with random initialization: {}".format(np.mean(np.argmax(pred, axis=1) == np.array(df_test['Sentiment']))))

Accuracy of CNN with random initialization: 0.651


In [12]:
cnn_glove = CNN(len(vocab), vectors).to(device)
optimizer = torch.optim.Adam(cnn_glove.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [13]:
cnn_glove.train()
for epoch in range(num_epochs):
    for batch in range(df_train.shape[0] // batch_size):
        pred = cnn_glove(X_train[batch * batch_size:(batch + 1) * batch_size])
        loss = criterion(pred, Y_train[batch * batch_size:(batch + 1) * batch_size])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [14]:
cnn_glove.eval()
pred = cnn_glove(X_test).detach().cpu().numpy()
print("Accuracy of CNN with GloVe: {}".format(np.mean(np.argmax(pred, axis=1) == np.array(df_test['Sentiment']))))

Accuracy of CNN with GloVe: 0.6865


In [15]:
class RNN(nn.Module):
    def __init__(self, num_embeddings, vectors=None):
        super().__init__()

        if vectors is None:
            self.embed = nn.Embedding(num_embeddings, embedding_dim)
        else:
            self.embed = nn.Embedding(num_embeddings, embedding_dim).from_pretrained(vectors)

        self.lstm = nn.LSTM(embedding_dim, hidden_size, batch_first=True)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(feature_maps, num_classes)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), hidden_size).to(device)
        c0 = torch.zeros(1, x.size(0), hidden_size).to(device)

        x = self.embed(x)
        out, _ = self.lstm(x, (h0, c0))
        out = self.dropout(out)
        out = self.fc(out[:, -1, :])
        return out

In [16]:
rnn_random = RNN(len(vocab)).to(device)
optimizer = torch.optim.Adam(rnn_random.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [17]:
rnn_random.train()
for epoch in range(num_epochs):
    pred = rnn_random(X_train)
    loss = criterion(pred, Y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [18]:
rnn_random.eval()
pred = rnn_random(X_test).detach().cpu().numpy()
print("Accuracy of RNN with random initialization: {}".format(np.mean(np.argmax(pred, axis=1) == np.array(df_test['Sentiment']))))

Accuracy of RNN with random initialization: 0.5835


In [19]:
rnn_glove = RNN(len(vocab), vectors).to(device)
optimizer = torch.optim.Adam(rnn_glove.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [20]:
rnn_glove.train()
for epoch in range(num_epochs):
    for batch in range(df_train.shape[0] // batch_size):
        pred = rnn_glove(X_train[batch * batch_size:(batch + 1) * batch_size])
        loss = criterion(pred, Y_train[batch * batch_size:(batch + 1) * batch_size])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [21]:
rnn_glove.eval()
pred = rnn_glove(X_test).detach().cpu().numpy()
print("Accuracy of RNN with GloVe: {}".format(np.mean(np.argmax(pred, axis=1) == np.array(df_test['Sentiment']))))

Accuracy of RNN with GloVe: 0.6715
