In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F #non-linearity
import torch.optim as optim 

torch.manual_seed(1)

<torch._C.Generator at 0x1083440f0>

In [4]:
lin = nn.Linear(5,3)
print(lin)

Linear(in_features=5, out_features=3, bias=True)


In [6]:
data = torch.randn(2,5)
print(data)
print(lin(data))

tensor([[ 0.4107, -0.9880, -0.9081,  0.5423,  0.1103],
        [-2.2590,  0.6067, -0.1383,  0.8310, -0.2477]])
tensor([[-0.3879, -0.2109, -0.5201],
        [-1.3890,  0.0820,  1.2022]], grad_fn=<AddmmBackward>)


In [7]:
# tanh(x),σ(x),ReLU(x) -- most common non-linearities
data = torch.randn(2,2)
print(data)
print(F.relu(data))

tensor([[-0.8029,  0.2366],
        [ 0.2857,  0.6898]])
tensor([[0.0000, 0.2366],
        [0.2857, 0.6898]])


In [8]:
data = torch.randn(5)
print(data)


tensor([-0.6331,  0.8795, -0.6842,  0.4533,  0.2912])


In [10]:
print(F.softmax(data,dim=0))
print(F.softmax(data,dim=0).sum())
print(F.log_softmax(data,dim=0))

tensor([0.0835, 0.3791, 0.0794, 0.2475, 0.2105])
tensor(1.)
tensor([-2.4825, -0.9700, -2.5337, -1.3962, -1.5583])
tensor(-8.9407)


In [22]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]
print(data)

[(['me', 'gusta', 'comer', 'en', 'la', 'cafeteria'], 'SPANISH'), (['Give', 'it', 'to', 'me'], 'ENGLISH'), (['No', 'creo', 'que', 'sea', 'una', 'buena', 'idea'], 'SPANISH'), (['No', 'it', 'is', 'not', 'a', 'good', 'idea', 'to', 'get', 'lost', 'at', 'sea'], 'ENGLISH')]


In [23]:
test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]
print(test_data)

[(['Yo', 'creo', 'que', 'si'], 'SPANISH'), (['it', 'is', 'lost', 'on', 'me'], 'ENGLISH')]


In [62]:
word_to_ix = {}
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)

SPANISH
['me', 'gusta', 'comer', 'en', 'la', 'cafeteria']
ENGLISH
['Give', 'it', 'to', 'me']
SPANISH
['No', 'creo', 'que', 'sea', 'una', 'buena', 'idea']
ENGLISH
['No', 'it', 'is', 'not', 'a', 'good', 'idea', 'to', 'get', 'lost', 'at', 'sea']
SPANISH
['Yo', 'creo', 'que', 'si']
ENGLISH
['it', 'is', 'lost', 'on', 'me']
{'me': 0, 'gusta': 1, 'comer': 2, 'en': 3, 'la': 4, 'cafeteria': 5, 'Give': 6, 'it': 7, 'to': 8, 'No': 9, 'creo': 10, 'que': 11, 'sea': 12, 'una': 13, 'buena': 14, 'idea': 15, 'is': 16, 'not': 17, 'a': 18, 'good': 19, 'get': 20, 'lost': 21, 'at': 22, 'Yo': 23, 'si': 24, 'on': 25}


In [25]:
VOCAB_SIZE = len(word_to_ix)
print(VOCAB_SIZE)
NUM_LABELS = 2

26


In [77]:
class BoWClassifier(nn.Module):  # inheriting from nn.Module!

    def __init__(self, num_labels, vocab_size):
        # calls the init function of nn.Module.  Dont get confused by syntax,
        # just always do it in an nn.Module
        super(BoWClassifier, self).__init__()

        # Define the parameters that you will need.  In this case, we need A and b,
        # the parameters of the affine mapping.
        # Torch defines nn.Linear(), which provides the affine map.
        # Make sure you understand why the input dimension is vocab_size
        # and the output is num_labels!
        self.linear = nn.Linear(vocab_size, num_labels)

        # NOTE! The non-linearity log softmax does not have parameters! So we don't need
        # to worry about that here

    def forward(self, bow_vec):
        # Pass the input through the linear layer,
        # then pass that through log_softmax.
        # Many non-linearities and other functions are in torch.nn.functional
        return F.log_softmax(self.linear(bow_vec), dim=1)


def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
   # print(vec)
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)


def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])


model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

# the model knows its parameters.  The first output below is A, the second is b.
# Whenever you assign a component to a class variable in the __init__ function
# of a module, which was done with the line
# self.linear = nn.Linear(...)
# Then through some Python magic from the PyTorch devs, your module
# (in this case, BoWClassifier) will store knowledge of the nn.Linear's parameters
for param in model.parameters():
    print(param)

# To run the model, pass in a BoW vector
# Here we don't need to train, so the code is wrapped in torch.no_grad()
with torch.no_grad():
    sample = data[0]
    bow_vector = make_bow_vector(sample[0], word_to_ix)
    log_probs = model(bow_vector)
    print(log_probs)


Parameter containing:
tensor([[-0.1721,  0.1112,  0.1503,  0.1489,  0.1107, -0.0308,  0.1314,  0.1535,
          0.1022, -0.1411, -0.0684,  0.1554,  0.1495,  0.1251, -0.0820, -0.0388,
         -0.1461, -0.1118,  0.1505, -0.1610, -0.0485,  0.0416, -0.1177,  0.0457,
         -0.0315,  0.0784],
        [-0.1505,  0.0271, -0.1788,  0.1217,  0.1905, -0.0700,  0.0290, -0.1010,
         -0.1255, -0.0993, -0.0934,  0.0372, -0.0225,  0.0138, -0.1503, -0.1232,
         -0.1698, -0.1410,  0.1642,  0.0729, -0.1870, -0.0327,  0.0157,  0.1048,
          0.1819,  0.0474]], requires_grad=True)
Parameter containing:
tensor([ 0.1382, -0.0698], requires_grad=True)
(['me', 'gusta', 'comer', 'en', 'la', 'cafeteria'], 'SPANISH')
tensor([[-0.4424, -1.0286]])


In [63]:
label_to_ix = {"SPANISH": 0, "ENGLISH":1}

In [82]:
with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)


tensor([[-0.6546, -0.7332]])
tensor([[-0.4482, -1.0182]])


In [84]:
print(next(model.parameters())[:,word_to_ix["creo"]])

tensor([-0.0684, -0.0934], grad_fn=<SelectBackward>)


In [46]:
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(),lr = 0.1)

In [91]:
for epoch in range(100):
    for instance, label in data:
        model.zero_grad()
        bow_vec = make_bow_vector(instance, word_to_ix)
        target = make_target(label, label_to_ix)
        
        log_probs = model(bow_vec)
        
        loss = loss_function(log_probs,target)
        loss.backward()
        optimizer.step()
        
    with torch.no_grad():
        for instance, label in test_data:
            bow_vec = make_bow_vector(instance, word_to_ix)
            log_probs = model(bow_vec)
print(log_probs)
            
print(next(model.parameters())[:,word_to_ix["creo"]])    

tensor([[-0.4482, -1.0182]])
tensor([-0.0684, -0.0934], grad_fn=<SelectBackward>)


In [55]:
print(next(model.parameters())[:,word_to_ix["creo"]])

tensor([ 0.3846, -0.7007], grad_fn=<SelectBackward>)


tensor([[-0.0799, -2.5671]])
tensor([[-2.4450, -0.0907]])
tensor([ 0.2439, -0.5600], grad_fn=<SelectBackward>)
tensor([[-0.0620, -2.8119]])
tensor([[-2.8004, -0.0627]])
tensor([ 0.3045, -0.6206], grad_fn=<SelectBackward>)
