In [44]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

In [55]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

In [56]:
def word2index(sentences):
    word_dict = {}
    for sentence in sentences:
        for word in sentence:
            try:
                word_dict[word]
            except:
                if word_dict.keys():
                    word_dict[word] = max(word_dict.values()) + 1
                else: 
                    word_dict[word] = 0
    return word_dict
                
sentences_list = [a[0] for a in data]
word_dict = word2index(sentences_list)
print(word_dict)

{'en': 3, 'No': 9, 'buena': 14, 'it': 7, 'at': 22, 'sea': 12, 'cafeteria': 5, 'la': 4, 'to': 8, 'creo': 10, 'is': 16, 'a': 18, 'good': 19, 'get': 20, 'idea': 15, 'que': 11, 'not': 17, 'me': 0, 'gusta': 1, 'lost': 21, 'Give': 6, 'una': 13, 'comer': 2}


In [57]:
import numpy as np

In [58]:
def sentence2vec(sentence, word_dict):
    vect = np.zeros(len(word_dict.values()))
    for word in sentence:
        try:
            vect[word_dict[word]] += 1
        except:
            pass
    return vect

print(sentence2vec("lol ptdr c hyper grand lol".split(" "), word_dict))

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.]


In [59]:
def language2index(language_list):
    language_dict = {}
    for language in language_list:
        try:
            language_dict[language]
        except:
            if language_dict.values():
                language_dict[language] = max(language_dict.values()) + 1
            else:
                language_dict[language] = 0
    return language_dict

language_list = [a[1] for a in data]
language_dict = language2index(language_list)
print(language_dict)

{'ENGLISH': 1, 'SPANISH': 0}


In [90]:
def language2vec(language, language_dict):
    vect = np.zeros(len(language_dict.values()))
    vect[language_dict[language]] = 1
    return vect

print(language2vec("ENGLISH", language_dict))

[ 0.  1.]


In [84]:
class NN(nn.Module):
    def __init__(self, input_size, output_size):
        super(NN, self).__init__()
        
        self.lin = nn.Linear(input_size, output_size)
        self.softmax = nn.LogSoftmax()
    
    def forward(self, input):
        output = self.lin(input)
        output = self.softmax(output)
        return output

input_size, output_size = len(word_dict.values()), 2
model = NN(input_size, output_size)

In [88]:
criterion = nn.NLLLoss()
learning_rate = 0.005
optimizer = optim.SGD(model.parameters(), lr = learning_rate)
n_epochs = 100

for n in range(n_epochs):
    for sentence, language in data:
        optimizer.zero_grad()
        input = Variable(torch.from_numpy(sentence2vec(sentence, word_dict)).float().view(1, -1))
        target = Variable(torch.from_numpy(language2vec(language, language_dict)).long())

        output = model(input).view(-1)

        loss = criterion(output, target)
        loss.backward()
        optimizer.step()



    

In [89]:
for sentence, language in test_data:
    input = Variable(torch.from_numpy(sentence2vec(sentence, word_dict)).float().view(1, -1))
    output = np.exp(model(input).data.numpy())
    print(output)
    print(language)
    print(sentence)

[[ 0.2721377  0.7278623]]
SPANISH
['Yo', 'creo', 'que', 'si']
[[ 0.8569876  0.1430124]]
ENGLISH
['it', 'is', 'lost', 'on', 'me']
