In [20]:
import numpy as np 
import pandas as pd 

import matplotlib.pyplot as plt

import torch
import torch.nn.functional as F
from torch import nn, optim

In [21]:
training_data = [
        ("Veinte paginas".lower().split(), "Spanish"),
        ("I will visit the library".lower().split(), "English"),
        ("I am reading a book".lower().split(), "English"),
        ("This is my favourite chapter".lower().split(), "English"),
        ("Estoy en la biblioteca".lower().split(), "Spanish"),
        ("Tengo un libro".lower().split(), "Spanish")
        ]

test_data = [
        ("Estoy leyendo".lower().split(), "Spanish"),
        ("This is not my favourite book".lower().split(), "English")
        ]

word_dict = {}
i = 0
for words, language in training_data + test_data:
    for word in words:
        if word not in word_dict:
            word_dict[word] = i
            i += 1
print(word_dict)



{'the': 5, 'libro': 22, 'a': 9, 'estoy': 16, 'am': 7, 'paginas': 1, 'chapter': 15, 'library': 6, 'la': 18, 'un': 21, 'favourite': 14, 'reading': 8, 'will': 3, 'i': 2, 'leyendo': 23, 'en': 17, 'not': 24, 'visit': 4, 'book': 10, 'is': 12, 'biblioteca': 19, 'this': 11, 'my': 13, 'tengo': 20, 'veinte': 0}


In [22]:
corpus_size = len(word_dict)
languages = 2
label_index = {"Spanish": 0, "English": 1}

In [23]:
class BagofWordsClassifier(nn.Module):  

    def __init__(self, languages, corpus_size):
        super(BagofWordsClassifier, self).__init__()
        self.linear = nn.Linear(corpus_size, languages)

    def forward(self, bow_vec):
        return F.log_softmax(self.linear(bow_vec), dim=1)

In [24]:
def make_bow_vector(sentence, word_index):
    word_vec = torch.zeros(corpus_size)
    for word in sentence:
        word_vec[word_dict[word]] += 1
    return word_vec.view(1, -1)

def make_target(label, label_index):
    return torch.LongTensor([label_index[label]])

In [25]:
model = BagofWordsClassifier(languages, corpus_size)

In [27]:
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [30]:
for epoch in range(100):
    for sentence, label in training_data:

        model.zero_grad()

        bow_vec = make_bow_vector(sentence, word_dict)
        target = make_target(label, label_index)

        log_probs = model(bow_vec)

        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()
        
    if epoch % 10 == 0:
        print('Epoch: ',str(epoch+1),', Loss: ' + str(loss.item()))

Epoch:  1 , Loss: 0.013411426916718483
Epoch:  11 , Loss: 0.012176347896456718
Epoch:  21 , Loss: 0.011149131692945957
Epoch:  31 , Loss: 0.0102814557030797
Epoch:  41 , Loss: 0.009538905695080757
Epoch:  51 , Loss: 0.00889623910188675
Epoch:  61 , Loss: 0.00833461619913578
Epoch:  71 , Loss: 0.007839507423341274
Epoch:  81 , Loss: 0.007399887777864933
Epoch:  91 , Loss: 0.007006953936070204


In [78]:
def make_predictions(data):

    with torch.no_grad():
        sentence = data[0]
        label = data[1]
        bow_vec = make_bow_vector(sentence, word_dict)
        log_probs = model(bow_vec)
        print(sentence)
        print(label + ':')
        print(np.exp(log_probs))
        
make_predictions(test_data[0])
make_predictions(test_data[1])

['estoy', 'leyendo']
Spanish:
tensor([[0.8606, 0.1394]])
['this', 'is', 'not', 'my', 'favourite', 'book']
English:
tensor([[0.0074, 0.9926]])


In [74]:
def return_params(word): 
    index = word_dict[word]
    for p in model.parameters():
        dims = len(p.size())
        if dims == 2:
            print(word + ':')
            print('Spanish Parameter = ' + str(p[0][index].item()))
            print('English Parameter = ' + str(p[1][index].item()))
            print('\n')
            
return_params('estoy')
return_params('book')

book:
Spanish Parameter = -0.49991941452026367
English Parameter = 0.4414403736591339


estoy:
Spanish Parameter = 0.43009454011917114
English Parameter = -0.5826961398124695




In [85]:
new_sentence = (["not"],"English")
make_predictions(new_sentence)

['not']
English:
tensor([[0.7152, 0.2848]])
