In [1]:
import torch
import torch.nn as nn

# Embedding

Embeddings take a tensor of indices of tokens, into a matrix of the embeddings of the tokens. 

In [2]:
vocab_size = 5
embedding_dim = 7

emb = torch.nn.Embedding(vocab_size, embedding_dim)

emb

Embedding(5, 7)

In [3]:
x = torch.tensor([0,1,2,3,4,4,4,4])
print(x.size())

embeddings = emb(x)
print(embeddings.size())
print(embeddings)

torch.Size([8])
torch.Size([8, 7])
tensor([[-1.1659,  1.0582,  0.9662, -0.7761, -0.4046,  0.4042, -0.3711],
        [-1.3935,  1.0801,  1.0189,  1.6781,  0.7495,  1.9403, -0.9764],
        [ 0.8480, -0.3351, -0.0892,  0.4605, -0.5739, -0.3665,  1.7589],
        [-1.6711,  0.8306,  0.9070, -0.0072,  0.1296,  1.2627, -1.4916],
        [-0.0062, -1.0555,  2.0760,  0.6944,  0.9913, -0.6511,  1.4319],
        [-0.0062, -1.0555,  2.0760,  0.6944,  0.9913, -0.6511,  1.4319],
        [-0.0062, -1.0555,  2.0760,  0.6944,  0.9913, -0.6511,  1.4319],
        [-0.0062, -1.0555,  2.0760,  0.6944,  0.9913, -0.6511,  1.4319]],
       grad_fn=<EmbeddingBackward0>)


In [4]:
# what if x is batched input? 
x_batched = torch.tensor([[0,1,2,2], [2,3,4,4]])
print(x_batched.size())

embeddings = emb(x_batched)
print(embeddings.size())
print(embeddings)

torch.Size([2, 4])
torch.Size([2, 4, 7])
tensor([[[-1.1659,  1.0582,  0.9662, -0.7761, -0.4046,  0.4042, -0.3711],
         [-1.3935,  1.0801,  1.0189,  1.6781,  0.7495,  1.9403, -0.9764],
         [ 0.8480, -0.3351, -0.0892,  0.4605, -0.5739, -0.3665,  1.7589],
         [ 0.8480, -0.3351, -0.0892,  0.4605, -0.5739, -0.3665,  1.7589]],

        [[ 0.8480, -0.3351, -0.0892,  0.4605, -0.5739, -0.3665,  1.7589],
         [-1.6711,  0.8306,  0.9070, -0.0072,  0.1296,  1.2627, -1.4916],
         [-0.0062, -1.0555,  2.0760,  0.6944,  0.9913, -0.6511,  1.4319],
         [-0.0062, -1.0555,  2.0760,  0.6944,  0.9913, -0.6511,  1.4319]]],
       grad_fn=<EmbeddingBackward0>)


In [5]:
class LSTMTagger(torch.nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = torch.nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        # self.lstm = torch.nn.LSTM(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        # self.hidden2tag = torch.nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [6]:
model = LSTMTagger(10, 20, 30, 40)

print(model)

LSTMTagger(
  (word_embeddings): Embedding(30, 10)
)


In [7]:
for param in model.parameters(): 
    print(param)
    print(param.size())

Parameter containing:
tensor([[ 3.8494e-01, -5.5391e-01,  9.9834e-01, -1.4209e+00,  1.4911e+00,
         -3.7122e-02,  1.8848e+00, -1.0468e-01, -3.7011e-01, -7.3973e-01],
        [-6.4030e-01, -1.3119e+00,  1.2074e+00,  1.8469e-01, -2.4438e-01,
          2.5076e-02,  2.3591e+00,  2.8905e-01, -1.9081e+00,  8.3450e-02],
        [-1.2986e+00, -1.1139e+00, -5.6324e-01,  1.0266e+00,  1.9043e+00,
         -2.9037e-01, -3.0238e-01, -1.2839e+00, -2.1847e+00,  8.3514e-01],
        [ 2.9673e-01,  9.4009e-01, -1.2846e+00,  1.9970e+00, -7.7268e-01,
         -3.9361e-02,  3.3113e-01, -2.2499e-01, -9.4550e-01,  1.6217e-01],
        [-7.7959e-01, -5.4716e-01, -2.3300e-01,  2.0358e+00, -9.7984e-01,
          6.7819e-01,  6.1051e-01,  8.7500e-01,  5.6072e-01, -6.7303e-01],
        [ 5.4724e-01, -1.9696e-01, -1.0326e-01, -3.1680e-01,  3.1085e-01,
          1.2024e+00, -3.7980e-01,  2.7570e-01, -1.6197e+00, -1.1814e-01],
        [-4.4478e-01,  8.1144e-02, -1.0417e-01, -5.5334e-01,  1.4114e-01,
          

# Loss

## Example of target with class indices

In [8]:

loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)


In [9]:
input.size()

torch.Size([3, 5])

In [10]:
target.size()

torch.Size([3])

In [11]:
input

tensor([[-0.4212, -0.4190,  0.1881,  0.6210,  0.1523],
        [ 1.2478, -2.2200, -0.7616,  1.1415,  0.3109],
        [ 0.0583,  0.8081,  0.4962,  0.6873,  0.8748]], requires_grad=True)

In [12]:
input.grad

In [13]:
target

tensor([3, 2, 4])

In [14]:
output.backward()

In [15]:
input.grad

tensor([[ 0.0394,  0.0395,  0.0725, -0.2215,  0.0700],
        [ 0.1357,  0.0042, -0.3151,  0.1220,  0.0532],
        [ 0.0379,  0.0801,  0.0587,  0.0710, -0.2477]])

## Example of target with class probabilities

In [16]:
input = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5).softmax(dim=1)
output = loss(input, target)

In [17]:
input.size()

torch.Size([3, 5])

In [18]:
target.size()

torch.Size([3, 5])

In [19]:
input

tensor([[ 0.1829,  0.2004,  0.8537, -0.0622,  0.9630],
        [-0.7863, -1.9025, -0.9376,  0.8261,  1.0012],
        [ 1.8848,  1.7796, -1.5515,  0.5112,  0.0946]], requires_grad=True)

In [20]:
input.grad

In [21]:
target

tensor([[0.0520, 0.1592, 0.1407, 0.3805, 0.2676],
        [0.3378, 0.0565, 0.3831, 0.1195, 0.1031],
        [0.0207, 0.1292, 0.0734, 0.0547, 0.7221]])

In [22]:
output.backward()

In [23]:
input.grad

tensor([[ 0.0307, -0.0042,  0.0471, -0.0892,  0.0156],
        [-0.0873, -0.0105, -0.1060,  0.0871,  0.1168],
        [ 0.1348,  0.0845, -0.0199,  0.0176, -0.2171]])

In [24]:
output.grad

  output.grad


# tensor

In [25]:
import torch

# Your 2D tensor
tensor_2d = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

# Get the last row
last_row = tensor_2d[-1]

print(last_row)

tensor([7, 8, 9])


In [26]:
len(tensor_2d.size())

2

In [27]:
h_0 = torch.zeros(256)

h_0.size()

torch.Size([256])

In [28]:
h_0 = h_0.unsqueeze(0).unsqueeze(0)

h_0.size()

torch.Size([1, 1, 256])

# extract the last non-zero along an axis

In [29]:
def get_sample_tensor(non_zeros, zeros): 
    t = torch.arange(non_zeros)

    t0 = torch.zeros(zeros, dtype=t.dtype)

    result_tensor = torch.cat((t, t0)).view(8,4)
    return result_tensor

In [30]:
t1 = get_sample_tensor(20, 12)
t2 = get_sample_tensor(32, 0)
t3 = get_sample_tensor(16, 16)

t1.size()

torch.Size([8, 4])

In [31]:
rnn_output = torch.stack([t1, t2, t3], dim=0)
rnn_output.size()

torch.Size([3, 8, 4])

In [32]:
rnn_output

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11],
         [12, 13, 14, 15],
         [16, 17, 18, 19],
         [ 0,  0,  0,  0],
         [ 0,  0,  0,  0],
         [ 0,  0,  0,  0]],

        [[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11],
         [12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23],
         [24, 25, 26, 27],
         [28, 29, 30, 31]],

        [[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11],
         [12, 13, 14, 15],
         [ 0,  0,  0,  0],
         [ 0,  0,  0,  0],
         [ 0,  0,  0,  0],
         [ 0,  0,  0,  0]]])

In [33]:
# Sample sequence lengths (L)
sequence_lengths = [5, 8, 4]

In [34]:
# Create a mask to indicate valid positions
mask = torch.arange(rnn_output.size(1)).unsqueeze(0) < torch.tensor(sequence_lengths).unsqueeze(1)
mask


tensor([[ True,  True,  True,  True,  True, False, False, False],
        [ True,  True,  True,  True,  True,  True,  True,  True],
        [ True,  True,  True,  True, False, False, False, False]])

In [35]:
# Find the last non-zero position along the sequence dimension
last_non_zero_positions = torch.max(mask * torch.arange(rnn_output.size(1)), dim=1).values
last_non_zero_positions

tensor([4, 7, 3])

In [36]:
mask * torch.arange(rnn_output.size(1))

tensor([[0, 1, 2, 3, 4, 0, 0, 0],
        [0, 1, 2, 3, 4, 5, 6, 7],
        [0, 1, 2, 3, 0, 0, 0, 0]])

In [37]:
# Extract the last non-zero tensor along the sequence dimension
final_outputs = rnn_output[torch.arange(rnn_output.size(0)), last_non_zero_positions]

print(final_outputs.shape)  # Size: (N, output_size)

torch.Size([3, 4])


In [38]:
# temp 
torch.stack([torch.tensor(1),torch.tensor(2)])

tensor([1, 2])

In [39]:
torch.tensor(1).size()

torch.Size([])

## broadcasting

In the case of comparing a tensor of size (1, 8) and a tensor of size (3, 1), the result will be a tensor of size (3, 8). This is again due to broadcasting rules.

When performing element-wise operations (such as comparison) between two tensors, PyTorch compares their dimensions element-wise, **starting from the rightmost dimension**. Broadcasting allows the tensors to be compatible for element-wise operations when:

1. The size of the dimensions matches.
2. One of the sizes is 1.  

Let's look at the example:

Tensor A: (1, 8)
Tensor B: (3, 1)  

In this case, PyTorch will broadcast the smaller tensor (Tensor B) along its singleton dimension to match the size of the corresponding dimension in the larger tensor (Tensor A). Broadcasting will stretch Tensor B to have the same size as Tensor A along the second dimension. As a result, you get a tensor of size (3, 8) when performing element-wise operations.

In [40]:
mask = torch.arange(rnn_output.size(1)).unsqueeze(0) < torch.tensor(sequence_lengths).unsqueeze(1)
mask

tensor([[ True,  True,  True,  True,  True, False, False, False],
        [ True,  True,  True,  True,  True,  True,  True,  True],
        [ True,  True,  True,  True, False, False, False, False]])

In [41]:
# temp
print(torch.arange(rnn_output.size(1)))
print(torch.arange(rnn_output.size(1)).size())
print(torch.arange(rnn_output.size(1)).unsqueeze(0))
print(torch.arange(rnn_output.size(1)).unsqueeze(0).size())

tensor([0, 1, 2, 3, 4, 5, 6, 7])
torch.Size([8])
tensor([[0, 1, 2, 3, 4, 5, 6, 7]])
torch.Size([1, 8])


In [42]:
# temp
print(torch.tensor(sequence_lengths))
print(torch.tensor(sequence_lengths).size())
print(torch.tensor(sequence_lengths).unsqueeze(1))
print(torch.tensor(sequence_lengths).unsqueeze(1).size())

tensor([5, 8, 4])
torch.Size([3])
tensor([[5],
        [8],
        [4]])
torch.Size([3, 1])


# Extract specific element along axis

In [43]:
# Create a tensor of size (3, 10)
tensor = torch.arange(30).view(3, 10)
tensor


tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]])

In [44]:
seq_lengths = torch.tensor([2,4,8])
seq_lengths

tensor([2, 4, 8])

In [45]:
indices = seq_lengths - 1
indices

tensor([1, 3, 7])

In [46]:
# Use fancy indexing to extract elements from each row
result = tensor[range(tensor.size(0)), indices]

print(result)


tensor([ 1, 13, 27])


In [47]:
result = tensor[[0,1,2], indices]
result

tensor([ 1, 13, 27])

In [48]:
# indices = (seq_lengths - 1).view(-1, 1).expand(len(seq_lengths), out_unpacked.size(2)).unsqueeze(0)
indices = (seq_lengths - 1).view(-1, 1).expand(len(seq_lengths))
indices

RuntimeError: expand(torch.LongTensor{[3, 1]}, size=[3]): the number of sizes provided (1) must be greater or equal to the number of dimensions in the tensor (2)

# pack_padded_sequence

In [None]:
# seq_lengths: tensor([43,  35,  30, 138])
# sorted_indices=tensor([3, 0, 1, 2]), unsorted_indices=tensor([1, 2, 3, 0])

# [138, 43, 35, 30]

# for the first 30 cells, all 4 tensor have it
# continue till the 35th cell, the first 3 tensors have it
# continue till the 43th cell, the first 2 tensors have it
# till the end, only the first 1 tensor have it

batch_sizes = [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [None]:
len([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4])

In [None]:
len([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3])

In [None]:
len([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2])

In [None]:
len([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [None]:
sum([138, 43, 35, 30])

In [None]:
torch.arange(8).size()

In [None]:
torch.arange(8)[None, :]

In [None]:
torch.arange(8)[None, :].size()

In [None]:
len([ 0.2054,  0.0020,  0.0486,  0.0508,  0.1717,  0.4226,  0.0641,  0.2693,
        -0.1672,  0.5834,  0.1972,  0.0074,  0.2665, -0.1019, -0.2332, -0.2746,
         0.2326,  0.2747,  0.1856,  0.2407,  0.4252,  0.2461,  0.2270,  0.4674,
        -0.3185, -0.3030, -0.1261,  0.0362, -0.0045,  0.0117,  0.1414,  0.1806,
         0.3911,  0.4545, -0.2965,  0.0517,  0.1228,  0.4499,  0.4384,  0.0195,
         0.1236, -0.1288,  0.0365,  0.0000])

# sort a counter 

In [None]:
tensor_str1 = """
(tensor([ 113, 1376,  519, 1320,   10,    7, 1811, 1338, 1148,    7, 4311,    3,
         113, 4312,   24, 1676, 6154,  821, 2084,   25,    0, 6155, 4312,    3,
        1148,    3,  112,    3, 1811,  951,  803, 3720, 1123, 2084,    0,    6,
           8,    2,    9]), tensor(1))
"""

In [None]:
tensor_str2 = """
(tensor([ 113, 1376,  519, 1320,   10,    7, 1811, 1338, 1148,    7, 4311,    3,
         113, 4312,   24, 1676, 6154,  821, 2084,   25,    0, 6155, 4312,    3,
        1148,    3,  112,    3, 1811,  951,  803, 3720, 1123, 2084,    0,    6,
           8,    2,    9]), tensor(1))
"""

In [22]:
print(tensor_str1 == tensor_str2)

True


# dimension

In [62]:
t = torch.tensor([[1,6,3,8], [5,2,7,4]])
print(t.size())
t

torch.Size([2, 4])


tensor([[1, 6, 3, 8],
        [5, 2, 7, 4]])

In [63]:
t = t.unsqueeze(-1)
print(t.size())
t

torch.Size([2, 4, 1])


tensor([[[1],
         [6],
         [3],
         [8]],

        [[5],
         [2],
         [7],
         [4]]])

In [64]:
t = t.expand(-1, -1, 3)
print(t.size())
t

torch.Size([2, 4, 3])


tensor([[[1, 1, 1],
         [6, 6, 6],
         [3, 3, 3],
         [8, 8, 8]],

        [[5, 5, 5],
         [2, 2, 2],
         [7, 7, 7],
         [4, 4, 4]]])

In [71]:
s = torch.argmax(t, dim=2)
print(s.size())
s

torch.Size([2, 4])


tensor([[0, 0, 0, 0],
        [0, 0, 0, 0]])

In [72]:
5 //3

1

In [74]:
def my_function(x):
    """
    This function accepts a parameter x and asserts that it is greater than 0.
    """
    assert x > 0, "x must be greater than 0"
    # Rest of the function code here

# Example usage:
my_function(5)  # This will pass the assertion



In [75]:
# This will raise an AssertionError because -1 is not greater than 0
my_function(-1)


AssertionError: x must be greater than 0

# End