# Intro Deep Learning

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
torch.manual_seed(1)

<torch._C.Generator at 0x7f5f0361aa10>

In [2]:
lin = nn.Linear(5, 3)# maps from R^5 to R^3, parameters A, b
# data is 2x5. A maps from 5 to 3.. can we map "data" under A?
data = torch.randn(2, 5)
print(lin(data))

tensor([[ 0.1755, -0.3268, -0.5069],
        [-0.6602,  0.2260,  0.1089]], grad_fn=<AddmmBackward0>)


In [3]:
data = torch.randn(2, 2)
print(data)
print(F.relu(data))

tensor([[-0.5404, -2.2102],
        [ 2.1130, -0.0040]])
tensor([[0.0000, 0.0000],
        [2.1130, 0.0000]])


In [4]:
data = torch.randn(5)
print(data)
print(F.softmax(data, dim=0))
print(F.softmax(data, dim=0).sum())
print(F.log_softmax(data, dim=0))
print(F.log_softmax(data, dim=0).sum())

tensor([ 1.3800, -1.3505,  0.3455,  0.5046,  1.8213])
tensor([0.2948, 0.0192, 0.1048, 0.1228, 0.4584])
tensor(1.)
tensor([-1.2214, -3.9519, -2.2560, -2.0969, -0.7801])
tensor(-10.3063)


# Declaring Model

In [5]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

word_to_idx = {}
for sent, _ in data + test_data:
  for word in sent:
    if word not in word_to_idx:
      word_to_idx[word] = len(word_to_idx)
print(word_to_idx)

VOCAB_SIZE = len(word_to_idx)
NUM_LABELS = 2

class BoWClassifier(nn.Module):
  def __init__(self, num_labels, vocab_size):
    super(BoWClassifier, self).__init__()

    self.linear = nn.Linear(vocab_size, num_labels)

  def forward(self, bow_vec):
    return F.log_softmax(self.linear(bow_vec), dim=1)

def make_bow_vector(sentence, word_to_idx):
  vec = torch.zeros(len(word_to_idx))
  for word in sentence:
    vec[word_to_idx[word]] = vec[word_to_idx[word]] + 1
  return vec.view(1, -1)

def make_target(label, label_to_idx):
  return torch.LongTensor([label_to_idx[label]])

model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)
for param in model.parameters():
  print(param)

# Take if the model works
with torch.no_grad():
  sample = data[0]
  bow_vector = make_bow_vector(sample[0], word_to_idx)
  log_probs = model(bow_vector)
  print(log_probs)

{'me': 0, 'gusta': 1, 'comer': 2, 'en': 3, 'la': 4, 'cafeteria': 5, 'Give': 6, 'it': 7, 'to': 8, 'No': 9, 'creo': 10, 'que': 11, 'sea': 12, 'una': 13, 'buena': 14, 'idea': 15, 'is': 16, 'not': 17, 'a': 18, 'good': 19, 'get': 20, 'lost': 21, 'at': 22, 'Yo': 23, 'si': 24, 'on': 25}
Parameter containing:
tensor([[ 0.1194,  0.0609, -0.1268,  0.1274,  0.1191,  0.1739, -0.1099, -0.0323,
         -0.0038,  0.0286, -0.1488, -0.1392,  0.1067, -0.0460,  0.0958,  0.0112,
          0.0644,  0.0431,  0.0713,  0.0972, -0.1816,  0.0987, -0.1379, -0.1480,
          0.0119, -0.0334],
        [ 0.1152, -0.1136, -0.1743,  0.1427, -0.0291,  0.1103,  0.0630, -0.1471,
          0.0394,  0.0471, -0.1313, -0.0931,  0.0669,  0.0351, -0.0834, -0.0594,
          0.1796, -0.0363,  0.1106,  0.0849, -0.1268, -0.1668,  0.1882,  0.0102,
          0.1344,  0.0406]], requires_grad=True)
Parameter containing:
tensor([0.0631, 0.1465], requires_grad=True)
tensor([[-0.5378, -0.8771]])


In [6]:
label_to_idx = {"SPANISH": 0, "ENGLISH": 1}

In [8]:
with torch.no_grad():
  for instance, label in test_data:
    bow_vec = make_bow_vector(instance, word_to_idx)
    log_probs = model(bow_vec)
    print(log_probs)

# Print the matrix column corresponding to "creo"
print(next(model.parameters())[:, word_to_idx["creo"]])

loss_fn = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
for epoch in range(100):
  for instance, label in data:
    # Step 1. Remember that PyTorch accumulates gradients.
    # We need to clear them out before each instance
    model.zero_grad()

    # Step 2. Make our BOW vector and also we must wrap the target in a
    # Tensor as an integer. For example, if the target is SPANISH, then
    # we wrap the integer 0. The loss function then knows that the 0th
    # element of the log probabilities is the log probability
    # corresponding to SPANISH
    bow_vec = make_bow_vector(instance, word_to_idx)
    target = make_target(label, label_to_idx)

    # Step 3. Run our forward pass.
    log_probs = model(bow_vec)
    # Step 4. Compute the loss, gradients, and update the parameters by
    # calling optimizer.step()
    loss = loss_fn(log_probs, target)
    loss.backward()
    optimizer.step()

with torch.no_grad():
  for instance, label in test_data:
    bow_vec = make_bow_vector(instance, word_to_idx)
    log_probs = model(bow_vec)
    print(log_probs)

# Index corresponding to Spanish goes up, English goes down!
print(next(model.parameters())[:, word_to_idx["creo"]])

tensor([[-0.9297, -0.5020]])
tensor([[-0.6388, -0.7506]])
tensor([-0.1488, -0.1313], grad_fn=<SelectBackward0>)
tensor([[-0.2093, -1.6669]])
tensor([[-2.5330, -0.0828]])
tensor([ 0.2803, -0.5605], grad_fn=<SelectBackward0>)
