In [None]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

In [None]:
lin = nn.Linear(5, 3)
data = autograd.Variable(torch.randn(2, 5))
print(lin(data))

In [None]:
data = autograd.Variable(torch.randn(2, 2))
print(data)
print(F.relu(data))

In [None]:
data = autograd.Variable(torch.randn(5))

In [None]:
data

In [None]:
print(F.softmax(data, dim=0))

In [None]:
print(F.softmax(data, dim=0).sum())

In [None]:
print(F.log_softmax(data, dim=0))

## Logistic Refression Bag-of-Words classifier

In [None]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

In [None]:
data

In [None]:
word_to_idx = {}
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_idx:
            word_to_idx[word] = len(word_to_idx)
print(word_to_idx)

In [None]:
VOCAB_SIZE = len(word_to_idx)
NUM_LABELS = 2

In [None]:
class BoWClassifier(nn.Module):
    def __init__(self, num_labels, vocab_size):
        super(BoWClassifier, self).__init__()
        
        self.linear = nn.Linear(vocab_size, num_labels)
        
    def forward(self, bow_vec):
        return F.log_softmax(self.linear(bow_vec), dim=1)

In [None]:
def make_bow_vector(sentence, word_to_idx):
    vec = torch.zeros(len(word_to_idx))
    for word in sentence:
        vec[word_to_idx[word]] += 1
    return vec.view(1, -1)

In [None]:
def make_target(label, label_to_idx):
    return torch.LongTensor([label_to_idx[label]])

In [None]:
model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

In [None]:
for param in model.parameters():
    print(param)

In [None]:
sample = data[0]

In [None]:
sample

In [None]:
bow_vector = make_bow_vector(sample[0], word_to_idx)
log_probs = model(autograd.Variable(bow_vector))
print(log_probs)

In [None]:
bow_vector

In [None]:
label_to_ix = {"SPANISH": 0, "ENGLISH": 1}

In [None]:
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_idx))
    log_probs = model(bow_vec)
    print(log_probs)

In [None]:
print(next(model.parameters())[:, word_to_idx["creo"]])

In [None]:
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [None]:
for epoch in range(100):
    for instance, label in data:
        model.zero_grad()
        
        bow_vec = autograd.Variable(make_bow_vector(instance, word_to_idx))
        target = autograd.Variable(make_target(label, label_to_ix))
        
        log_probs = model(bow_vec)
        
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()

In [None]:
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_idx))
    log_probs = model(bow_vec)
    print(log_probs)

In [None]:
print(next(model.parameters())[:, word_to_idx["creo"]])