# BOW and CBOW Sentiment

Not totally sure which of these are really BOW and CBOW or some mixture between the two...

Also, this is from the [CMU](http://phontron.com/class/nn4nlp2017/index.html) class (if that wasn't apparent from the name)


In [1]:
import torch
import torch.nn.functional as F
from collections import defaultdict
import random
import numpy as np

In [2]:
# Functions to read in the corpus
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i["<unk>"]
def read_dataset(filename):
    with open(filename, "r") as f:
        for line in f:
            tag, words = line.lower().strip().split(" ||| ")
            yield ([w2i[x] for x in words.split(" ")], t2i[tag])

In [3]:
# Read in the data
train = list(read_dataset("data/classes/train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("data/classes/test.txt"))
nwords = len(w2i)
ntags = len(t2i)

In [4]:
train_sents = open("data/classes/train.txt").readlines()
print len(train_sents)
train_sents[0]

8544


"3 ||| The Rock is destined to be the 21st Century 's new `` Conan '' and that he 's going to make a splash even greater than Arnold Schwarzenegger , Jean-Claud Van Damme or Steven Segal .\n"

In [15]:
class CBOW(torch.nn.Module):
    def __init__(self):
        super(CBOW, self).__init__()       
        self.embeddings = torch.nn.Embedding(len(w2i), len(t2i))
        self.bias = torch.nn.Parameter(torch.zeros(len(t2i)).view(1,-1))

    def forward(self, inputs):
        embeds = self.embeddings(inputs)
        return F.log_softmax(embeds.sum(dim=0).view(1,-1) + self.bias, dim=1)
    
class CBOW2(torch.nn.Module):
    def __init__(self, hidden_dim):
        super(CBOW2, self).__init__()       
        self.embeddings = torch.nn.Embedding(len(w2i), hidden_dim)
        self.linear = torch.nn.Linear(hidden_dim, len(t2i))

    def forward(self, inputs):
        embeds = self.embeddings(inputs).sum(dim=0).view(1,-1)
        return F.log_softmax(self.linear(embeds), dim=1)
    
class CBOW3(torch.nn.Module):
    def __init__(self, hidden_dim):
        super(CBOW3, self).__init__()       
        self.embeddings = torch.nn.Embedding(len(w2i), hidden_dim)
        self.linear = torch.nn.Linear(hidden_dim, len(t2i))
        self.dropout = torch.nn.Dropout(0.5)

    def forward(self, inputs):
        embeds = self.embeddings(inputs).sum(dim=0).view(1,-1)
        out = self.dropout(self.linear(embeds))
        return F.log_softmax(out, dim=1)
    
class BOW(torch.nn.Module):
    def __init__(self, embedding_dim):
        super(BOW, self).__init__()       
        self.embeddings = torch.nn.Embedding(len(w2i), len(t2i))

    def forward(self, inputs):
        embeds = self.embeddings(inputs)
        return F.log_softmax(embeds, dim=1).sum(dim=0).view(1,-1)

In [41]:
model = CBOW()
model.train()
criterion = torch.nn.CrossEntropyLoss(reduction="sum")
optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)
for t in range(40):
    
    random.shuffle(train)
    train_loss = 0.0
    for words, tag in train:
        optimizer.zero_grad()
        x = torch.tensor([w for w in words])
        y_pred = model(x)

        loss = criterion(y_pred, torch.tensor([tag]))
        train_loss += loss.item()
        loss.backward()
        optimizer.step()

    print t, train_loss/len(train)

0 3.74153947093
1 2.45141218083
2 1.83268226158
3 1.41387938882
4 1.130836651
5 0.919660911346
6 0.776208007195
7 0.655590380669
8 0.56924832905
9 0.493897508459
10 0.437929901779
11 0.388300467004
12 0.354219600436
13 0.32413468585
14 0.296508759719
15 0.275947284637
16 0.256219537897
17 0.239081522487
18 0.225710657498
19 0.211817980222
20 0.202557847719
21 0.19092202372
22 0.182301518212
23 0.172945399923
24 0.168296459858
25 0.160233159597
26 0.153083664796
27 0.14817414706
28 0.143048783787
29 0.137557932096
30 0.133083568824
31 0.129573064616
32 0.125685564356
33 0.121678461507
34 0.117801034805
35 0.115255029134
36 0.112374842289
37 0.10900549379
38 0.106461809872
39 0.104057779038


In [42]:
model.eval()
with torch.no_grad():
    correct = 0
    for words, tag in dev:
        x = torch.tensor([w for w in words])
        y_pred = model(x)
        correct += torch.argmax(y_pred, dim = 1).item() == tag

    print "dev", float(correct)/len(dev)
    correct = 0
    for words, tag in train:
        x = torch.tensor([w for w in words])
        y_pred = model(x)
        correct += torch.argmax(y_pred, dim = 1).item() == tag

    print "Train", float(correct)/len(train)  

dev 0.328959276018
Train 0.992041198502


In [29]:
# model = CBOW3(50)
# model.train()
# criterion = torch.nn.CrossEntropyLoss(reduction="sum")
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
# for t in range(1):
    
#     random.shuffle(train)
#     train_loss = 0.0
#     for words, tag in train:
#         optimizer.zero_grad()
#         x = torch.tensor([w for w in words])
#         y_pred = model(x)

#         loss = criterion(y_pred, torch.tensor([tag]))
#         train_loss += loss.item()
#         loss.backward()
#         optimizer.step()

#     print t, train_loss/len(train)

0 0.98368786606


In [30]:
# model.eval()
# with torch.no_grad():
#     correct = 0
#     for words, tag in dev:
#         x = torch.tensor([w for w in words])
#         y_pred = model(x)
#         correct += torch.argmax(y_pred, dim = 1).item() == tag

#     print "dev", float(correct)/len(dev)
#     correct = 0
#     for words, tag in train:
#         x = torch.tensor([w for w in words])
#         y_pred = model(x)
#         correct += torch.argmax(y_pred, dim = 1).item() == tag

#     print "Train", float(correct)/len(train)  

dev 0.354751131222
Train 0.949672284644


In [9]:
class Sentiment(torch.nn.Module):
    def __init__(self):
        super(Sentiment, self).__init__()       
        self.embeddings = torch.nn.Embedding(len(w2i), 1)

    def forward(self, inputs):
        embeds = self.embeddings(inputs)
        # print "embeds", embeds.sum(dim=0)
        return torch.sigmoid(embeds.sum(dim=0))

In [10]:
model = Sentiment()
criterion = torch.nn.BCELoss(reduction="sum")
optimizer = torch.optim.SGD(model.parameters(), lr=1e-0)

for t in range(40):
    
    random.shuffle(train)
    train_loss = 0.0
    for words, tag in train:
        if tag == 2:
            continue
        if tag < 2:
            tag = 0.0
        if tag > 2:
            tag = 1.0
        optimizer.zero_grad()
        x = torch.tensor([w for w in words])
        y_pred = model(x)
        # print y_pred
        loss = criterion(y_pred, torch.tensor([tag]))
        train_loss += loss.item()
        loss.backward()
        optimizer.step()

    print t, train_loss/len(train)

0 2.41802108698
1 1.33709841283
2 0.835866116547
3 0.586966595206
4 0.435553118778
5 0.33201093033
6 0.250492178666
7 0.199820565287
8 0.149695967955
9 0.118060995762
10 0.0773955546789
11 0.0685972794444
12 0.0529615695594
13 0.0337114673106
14 0.0321699700949
15 0.0310849630382
16 0.0286231650666
17 0.0267585572528
18 0.0245588197573
19 0.0224945617789
20 0.0222473219775
21 0.0217625251784
22 0.0217446602817
23 0.0215785687158
24 0.0214353888665
25 0.021317885629
26 0.0213136909239
27 0.0212321412386
28 0.0212236477079
29 0.0211467029458
30 0.0210595684644
31 0.02102685447
32 0.020967209591
33 0.0209652713155
34 0.0209372561681
35 0.0208828903269
36 0.0208587476873
37 0.0208273177294
38 0.0208158793943
39 0.0207795743961


In [11]:
model.eval()
with torch.no_grad():
    correct = 0
    using = 0
    for words, tag in dev:
        if tag == 2:
            continue
        using += 1
        if tag < 2:
            tag = 0.0
        if tag > 2:
            tag = 1.0
        x = torch.tensor([w for w in words])
        y_pred = model(x)
        correct += abs(tag - y_pred.item()) < 0.5

    print "dev", float(correct)/using
    correct = 0
    using = 0
    for words, tag in train:
        if tag == 2:
            continue
        using += 1
        if tag < 2:
            tag = 0.0
        if tag > 2:
            tag = 1.0
        x = torch.tensor([w for w in words])
        y_pred = model(x)
        # print y_pred
        correct += abs(tag - y_pred.item()) < 0.5

    print "Train", float(correct)/using  

dev 0.783635365184
Train 0.999132947977


# CNNs

In [176]:
class CNN(torch.nn.Module):
    def __init__(self, vocab_size, window_size, embedding_dim, output_channels):
        super(CNN, self).__init__()
        
        assert window_size%2 == 1 #odd window sizes please
        
        self.embedding_dim = embedding_dim
        self.output_channels = output_channels
        
        self.embed = torch.nn.Embedding(vocab_size, embedding_dim)
        self.cnn = torch.nn.Conv1d(embedding_dim, output_channels, window_size, padding = (window_size - 1)/2) 
        self.linear = torch.nn.Linear(output_channels, 1)
        
    def forward(self, inputs):
        embedded = self.embed(inputs).t().view(1,self.embedding_dim, -1)
        convoluted = self.cnn(embedded)
        pooled = F.relu(F.max_pool1d(convoluted, len(inputs)))
        return torch.sigmoid(self.linear(pooled.view(self.output_channels)))
    
cnn = CNN(5,3,32,16)
input = torch.tensor([0,1,2,1,4]) 
# print input
cnn(input)

tensor([0.4653], grad_fn=<SigmoidBackward>)

In [186]:
model = CNN(len(w2i),3,16,16)

criterion = torch.nn.BCELoss(reduction="sum")
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum = 0.1)

for t in range(40):
    
    random.shuffle(train)
    train_loss = 0.0
    for words, tag in train:
        if tag == 2:
            continue
        if tag < 2:
            tag = 0.0
        if tag > 2:
            tag = 1.0
        optimizer.zero_grad()
        x = torch.tensor([w for w in words])
        y_pred = model(x)
        # print y_pred
        loss = criterion(y_pred, torch.tensor([tag]))
        train_loss += loss.item()
        loss.backward()
        optimizer.step()

    print t, train_loss/len(train)

0 0.559116127963
1 0.546992398338
2 0.537920873117
3 0.529935765009
4 0.522986482761
5 0.514049974196
6 0.506678677761
7 0.496691504355
8 0.484541071418
9 0.478142225802
10 0.47074982075
11 0.458209036435
12 0.449260221628
13 0.438598727762
14 0.427408823783
15 0.418008087933
16 0.409858000395
17 0.396625886976
18 0.38380466141
19 0.371819286575
20 0.357757684014
21 0.34230660512
22 0.333373975069
23 0.312854016985
24 0.296530620671
25 0.282677547839
26 0.266663719459
27 0.242970353538
28 0.228311647407
29 0.211016473039
30 0.195309464113
31 0.1568078539
32 0.141335186566
33 0.12904788398
34 0.107520385369
35 0.0945435691704
36 0.0705132258136
37 0.0588438163109
38 0.0405577773309
39 0.026601341458


In [188]:
with torch.no_grad():
    correct = 0
    using = 0
    for words, tag in dev:
        if tag == 2:
            continue
        using += 1
        if tag < 2:
            tag = 0.0
        if tag > 2:
            tag = 1.0
        x = torch.tensor([w for w in words])
        y_pred = model(x)
        correct += abs(tag - y_pred.item()) < 0.5

    print "dev", float(correct)/using
    correct = 0
    using = 0
    for words, tag in train:
        if tag == 2:
            continue
        using += 1
        if tag < 2:
            tag = 0.0
        if tag > 2:
            tag = 1.0
        x = torch.tensor([w for w in words])
        y_pred = model(x)
        # print y_pred
        correct += abs(tag - y_pred.item()) < 0.5

    print "Train", float(correct)/using  

dev 0.668863261944
Train 0.996242774566
