In [10]:
import torch
import torch.nn as nn

# Embedding

Embeddings take a tensor of indices of tokens, into a matrix of the embeddings of the tokens. 

In [11]:
vocab_size = 5
embedding_dim = 7

emb = torch.nn.Embedding(vocab_size, embedding_dim)

emb

Embedding(5, 7)

In [12]:
x = torch.tensor([0,1,2,3,4,4,4,4])
print(type(x))
print(x)

emb(x)

<class 'torch.Tensor'>
tensor([0, 1, 2, 3, 4, 4, 4, 4])


tensor([[ 1.1159, -1.6301,  2.0315,  1.1607, -0.7758,  0.5866,  2.8454],
        [ 0.2817,  1.0223,  0.4960,  1.5578,  1.1068,  2.1891,  0.9872],
        [ 1.4275, -0.4618, -0.0252, -0.9590,  0.4144,  1.0903,  0.2251],
        [ 0.4877, -1.1983, -0.2735, -1.0186, -0.5245,  0.0922,  1.7625],
        [-0.6296, -1.0099, -1.0579,  1.2301,  0.7080,  0.3863, -0.4827],
        [-0.6296, -1.0099, -1.0579,  1.2301,  0.7080,  0.3863, -0.4827],
        [-0.6296, -1.0099, -1.0579,  1.2301,  0.7080,  0.3863, -0.4827],
        [-0.6296, -1.0099, -1.0579,  1.2301,  0.7080,  0.3863, -0.4827]],
       grad_fn=<EmbeddingBackward0>)

In [13]:
emb(x).size()

torch.Size([8, 7])

In [32]:
class LSTMTagger(torch.nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = torch.nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        # self.lstm = torch.nn.LSTM(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        # self.hidden2tag = torch.nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [33]:
model = LSTMTagger(10, 20, 30, 40)

print(model)

LSTMTagger(
  (word_embeddings): Embedding(30, 10)
)


In [35]:
for param in model.parameters(): 
    print(param)
    print(param.size())

Parameter containing:
tensor([[-2.5813e-01, -6.4703e-01, -9.1433e-01, -8.1346e-01, -1.0241e+00,
         -2.8796e-01,  3.4297e-02,  8.0998e-01, -1.3215e+00,  1.1946e+00],
        [ 1.0358e+00, -5.4781e-01, -7.6452e-01,  1.6602e-01,  3.3481e-01,
         -5.9183e-01,  1.0333e+00, -8.0865e-01,  3.4254e-01,  1.3579e+00],
        [-2.8722e-01, -7.2576e-02, -5.3387e-01,  7.6659e-01,  5.0026e-01,
         -8.2063e-02, -3.2224e-02, -1.6364e+00, -3.3055e-02, -8.8303e-01],
        [-1.1859e+00, -2.4185e-01,  4.2906e-01,  2.7880e-01,  1.9719e-02,
          1.0389e+00,  1.1196e+00,  1.0873e+00, -7.1420e-01,  4.9449e-01],
        [ 1.5089e+00,  1.9056e+00,  6.8070e-01,  1.7093e-01, -6.0116e-01,
         -1.4972e+00,  4.3371e-01, -3.8173e-01, -5.8957e-01,  1.0867e+00],
        [ 2.8598e-02,  4.2503e-01,  8.3138e-02,  1.5146e-02, -1.5358e+00,
          3.4631e-01, -1.1374e+00,  8.8088e-01, -7.5425e-02, -2.1819e+00],
        [ 9.7590e-01, -8.5094e-01,  1.4902e+00,  9.2025e-01, -5.9858e-01,
          

# Loss

## Example of target with class indices

In [59]:

loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)


In [60]:
input.size()

torch.Size([3, 5])

In [61]:
target.size()

torch.Size([3])

In [62]:
input

tensor([[-0.3556, -1.0022, -0.7583, -0.5866, -1.5448],
        [ 2.2494,  1.0764,  1.2176, -0.5809, -0.1928],
        [ 0.9878, -0.0733,  1.5109,  0.4811, -0.4841]], requires_grad=True)

In [63]:
input.grad

In [64]:
target

tensor([2, 1, 1])

In [65]:
output.backward()

In [68]:
input.grad

tensor([[ 0.1013,  0.0531, -0.2656,  0.0804,  0.0308],
        [ 0.1840, -0.2764,  0.0656,  0.0109,  0.0160],
        [ 0.0862, -0.3035,  0.1455,  0.0520,  0.0198]])

## Example of target with class probabilities

In [77]:
input = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5).softmax(dim=1)
output = loss(input, target)

In [78]:
input.size()

torch.Size([3, 5])

In [79]:
target.size()

torch.Size([3, 5])

In [80]:
input

tensor([[ 0.2569,  2.2694,  0.4131, -1.1755, -0.7490],
        [-0.9746, -1.0044, -0.7197,  0.2863,  0.6351],
        [-0.4826, -1.0022,  0.1887,  0.0504, -0.1648]], requires_grad=True)

In [81]:
input.grad

In [82]:
target

tensor([[0.1581, 0.1017, 0.0918, 0.3352, 0.3132],
        [0.2083, 0.5078, 0.1210, 0.0648, 0.0981],
        [0.0972, 0.0098, 0.2173, 0.0755, 0.6001]])

In [83]:
output.backward()

In [84]:
input.grad

tensor([[-0.0202,  0.2093,  0.0074, -0.1040, -0.0925],
        [-0.0412, -0.1418, -0.0039,  0.0782,  0.1087],
        [ 0.0179,  0.0266,  0.0259,  0.0605, -0.1310]])

In [50]:
output.grad

  output.grad


# Enum

In [16]:
import torch

# Your 2D tensor
tensor_2d = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

# Get the last row
last_row = tensor_2d[-1]

print(last_row)

tensor([7, 8, 9])


In [19]:
h_0 = torch.zeros(256)

h_0.size()

torch.Size([256])

In [20]:
h_0 = h_0.unsqueeze(0).unsqueeze(0)

h_0.size()

torch.Size([1, 1, 256])

# End