In [1]:
from collections import defaultdict
import time
import random
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable

In [2]:
class BoW(torch.nn.Module):
    def __init__(self, nwords, ntags):
        super(BoW, self).__init__()
        self.bias = Variable(torch.zeros(ntags),
                             requires_grad=True)
        self.embedding = nn.Embedding(nwords, ntags)
        # initialize the weights with xavier uniform (Glorot, X. & Bengio, Y. (2010))
        nn.init.xavier_uniform_(self.embedding.weight)

    def forward(self, words):
        emb = self.embedding(words)
        out = torch.sum(emb, dim=0) + self.bias # size(out) = N
        out = out.view(1, -1) # size(out) = 1 x N
        return out

In [3]:
class CBoW(torch.nn.Module):
    def __init__(self, nwords, ntags, emb_size):
        super(CBoW, self).__init__()
        self.embedding = nn.Embedding(nwords, emb_size)
        self.linear = nn.Linear(emb_size, ntags)
        
        nn.init.xavier_uniform_(self.embedding.weight)
        nn.init.xavier_uniform_(self.linear.weight)

    def forward(self, words):
        emb = self.embedding(words)
        emb_sum = torch.sum(emb, dim=0) # size(emb_sum) = emb_size
        emb_sum = emb_sum.view(1, -1) # size(emb_sum) = 1 x emb_size
        out = self.linear(emb_sum) # size(out) = 1 x ntags 
        return out

In [4]:
class DeepCBoW(torch.nn.Module):
    def __init__(self, nwords, ntags, nlayers, emb_size, hid_size):
        super(DeepCBoW, self).__init__()
        self.nlayers = nlayers

        self.embedding = nn.Embedding(nwords, emb_size)
        
        nn.init.xavier_uniform_(self.embedding.weight)

        # add nlayers number of layers
        self.linears = nn.ModuleList([
                nn.Linear(emb_size if i == 0 else hid_size, hid_size) \
                for i in range(nlayers)])
        for i in range(nlayers):
            nn.init.xavier_uniform_(self.linears[i].weight)

        self.output_layer = nn.Linear(hid_size, ntags)
        # initialize the weights with xavier uniform (Glorot, X. & Bengio, Y. (2010))
        nn.init.xavier_uniform_(self.output_layer.weight)

    def forward(self, words):
        emb = self.embedding(words)
        emb_sum = torch.sum(emb, dim=0) # size(emb_sum) = emb_size
        h = emb_sum.view(1, -1) # size(h) = 1 x emb_size
        for i in range(self.nlayers):
            h = torch.tanh(self.linears[i](h))
        out = self.output_layer(h)
        return out


# Data Preprocessing

In [5]:
# Functions to read in the corpus
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i["<unk>"]
def read_dataset(filename):
    with open(filename, "r") as f:
        for line in f:
            tag, words = line.lower().strip().split(" ||| ")
            yield ([w2i[x] for x in words.split(" ")], t2i[tag])

In [6]:
# Read in the data
train = list(read_dataset("data/train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("data/test.txt"))
nwords = len(w2i)
ntags = len(t2i)

# Bag of worlds

In [16]:
def testing(model,test):
    # Perform testing
    test_correct = 0.0
    for words, tag in test:
        words = torch.tensor(words)
        scores = model(words)[0].detach().cpu().numpy()
        predict = np.argmax(scores)
        if predict == tag:
            test_correct += 1
    print("Test acc=%.4f" % (test_correct/len(test)))


def training(model,criterion,optimizer,train,test):
    for ITER in range(0,16):
        # Perform training
        random.shuffle(train)
        train_loss = 0.0
        start = time.time()
        for words, tag in train:
            tag = torch.tensor([tag])
            scores = model(torch.tensor(words))
            loss = criterion(scores, tag)
            train_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print("iter %r: train loss/sent=%.4f, time=%.2fs" % (ITER, train_loss/len(train), time.time()-start))
        if ITER%5==0:
            testing(model,test)


In [17]:
# initialize the model
model = BoW(nwords, ntags)
optimizer = torch.optim.Adam(model.parameters())

In [18]:
training(model,nn.CrossEntropyLoss(),optimizer,train,dev)

iter 0: train loss/sent=1.4740, time=9.54s
Test acc=0.4027
iter 1: train loss/sent=1.1225, time=9.45s
iter 2: train loss/sent=0.9127, time=9.42s
iter 3: train loss/sent=0.7692, time=9.48s
iter 4: train loss/sent=0.6637, time=9.53s
iter 5: train loss/sent=0.5814, time=9.50s
Test acc=0.4109
iter 6: train loss/sent=0.5169, time=9.44s
iter 7: train loss/sent=0.4634, time=9.43s
iter 8: train loss/sent=0.4193, time=9.44s
iter 9: train loss/sent=0.3807, time=9.43s
iter 10: train loss/sent=0.3490, time=9.73s
Test acc=0.3964
iter 11: train loss/sent=0.3210, time=9.62s
iter 12: train loss/sent=0.2964, time=9.97s
iter 13: train loss/sent=0.2755, time=9.75s
iter 14: train loss/sent=0.2553, time=9.73s
iter 15: train loss/sent=0.2385, time=9.66s
Test acc=0.3864


# CBOW

In [19]:
# initialize the model
EMB_SIZE=20
model = CBoW(nwords, ntags, EMB_SIZE)

In [20]:
training(model,nn.CrossEntropyLoss(),torch.optim.Adam(model.parameters()),train,dev)

iter 0: train loss/sent=1.4213, time=31.05s
Test acc=0.4253
iter 1: train loss/sent=0.8960, time=34.74s
iter 2: train loss/sent=0.5288, time=34.72s
iter 3: train loss/sent=0.3330, time=34.18s
iter 4: train loss/sent=0.2265, time=34.39s
iter 5: train loss/sent=0.1597, time=37.12s
Test acc=0.3738
iter 6: train loss/sent=0.1194, time=34.40s
iter 7: train loss/sent=0.0876, time=36.41s
iter 8: train loss/sent=0.0648, time=35.29s
iter 9: train loss/sent=0.0496, time=37.03s
iter 10: train loss/sent=0.0371, time=34.62s
Test acc=0.3643
iter 11: train loss/sent=0.0274, time=35.08s
iter 12: train loss/sent=0.0229, time=34.77s
iter 13: train loss/sent=0.0163, time=35.57s
iter 14: train loss/sent=0.0140, time=36.26s
iter 15: train loss/sent=0.0112, time=35.42s
Test acc=0.3471


In [21]:
testing(model,train)

Test acc=0.9981


# Deep CBOW

In [None]:
# initialize the model
EMB_SIZE = 64
HID_SIZE = 64
NLAYERS = 2
model = DeepCBoW(nwords, ntags, NLAYERS, EMB_SIZE, HID_SIZE)

training(model,nn.CrossEntropyLoss(),torch.optim.Adam(model.parameters()),train,dev)

iter 0: train loss/sent=1.4338, time=93.87s
Test acc=0.4000
iter 1: train loss/sent=1.0111, time=104.94s
iter 2: train loss/sent=0.6412, time=104.80s
iter 3: train loss/sent=0.3960, time=104.48s
