## Pytorch Basic Fundamentals

In [1]:
import torch

randint = torch.randint(-100, 100, (5,))
randint

tensor([  8,  63, -54,  -4,  30])

In [None]:
tensor = torch.rand(3, 2)
tensor

tensor([[0.8225, 0.9045],
        [0.2724, 0.7028],
        [0.6283, 0.7486]])

In [5]:
zeros = torch.zeros(3,2)
zeros

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])

In [6]:
ones = torch.ones(4,2)
ones

tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]])

In [7]:
input = torch.empty(2, 3)
input

tensor([[1.0801e-05, 2.0773e+20, 1.9971e+20],
        [1.0017e-11, 2.1156e+23, 2.1764e-04]])

In [8]:
arange = torch.arange(5)
arange

tensor([0, 1, 2, 3, 4])

In [None]:
# constant 5 increment from 3 to 10
line_space = torch.linspace(3, 10, steps=5)
line_space

tensor([ 3.0000,  4.7500,  6.5000,  8.2500, 10.0000])

In [12]:
log_space = torch.logspace(start=-10, end=10, steps=5)
log_space

tensor([1.0000e-10, 1.0000e-05, 1.0000e+00, 1.0000e+05, 1.0000e+10])

In [None]:
# Diagonal Line
eye = torch.eye(3)
eye

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [None]:
a = torch.empty((2,3), dtype=torch.int32)
empty_list = torch.empty_like(a)
empty_list


tensor([[         0,          0, 1350192304],
        [     24515, 1861753168,      31912]], dtype=torch.int32)

### Diving more into Fundamentals

In [3]:
import torch 

prob = torch.tensor([0.1, 0.9])
# 10% or 0.1-> index 0
# 90% or 0.9-> index 1
# each probability points to the index of the probaility in the tensor
samples = torch.multinomial(prob, num_samples=10, replacement=True)
print(samples)

tensor([1, 0, 1, 1, 1, 1, 1, 0, 1, 1])


In [None]:
tensor1 = torch.arange(1, 5)
tensor2 = torch.arange(5, 9)
print(tensor1, tensor2, sep="\n")

# concat the new predictions with predicted old tensors(characters)
out = torch.cat((tensor1, tensor2))
print(out, out.dim(), sep="\n")

tensor([1, 2, 3, 4])
tensor([5, 6, 7, 8])
tensor([1, 2, 3, 4, 5, 6, 7, 8])
1


In [None]:
# Autoregressive -> predict new tokens based on prev tokens
# Triangle Lower
out = torch.tril(torch.ones(5, 5))
out

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [None]:
# Triangle Upper
out = torch.triu(torch.ones(5, 5))
out

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

In [None]:
# exponent of -inf = 0
out = torch.zeros(5, 5).masked_fill(torch.tril(torch.ones(5, 5))==0, float("-inf"))
out

tensor([[0.0000, 3.1416, 3.1416, 3.1416, 3.1416],
        [0.0000, 0.0000, 3.1416, 3.1416, 3.1416],
        [0.0000, 0.0000, 0.0000, 3.1416, 3.1416],
        [0.0000, 0.0000, 0.0000, 0.0000, 3.1416],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]])

In [32]:
input = torch.rand(2, 3, 4)

# swap index positions
out = input.transpose(0, 2)
print(input.shape)
print(out.shape)

torch.Size([2, 3, 4])
torch.Size([4, 3, 2])


In [34]:
tensor1 = torch.tensor([0, 1, 2])
tensor2 = torch.tensor([3, 4, 5])
tensor3 = torch.tensor([6, 7, 8])

# Stack tensor along a new dimension
stacked_tensor = torch.stack([tensor1, tensor2, tensor3])
stacked_tensor

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

### Neural-Net Layers

In [1]:
import torch
import torch.nn as nn

sample = torch.tensor([3., 4., 5.])

# Linear NN Module
linear = nn.Linear(3, 3, bias=False)
print(linear(sample))

tensor([-1.1886,  2.3584, -2.1891], grad_fn=<SqueezeBackward4>)


In [2]:
import torch.nn.functional as F

tensor1 = torch.tensor([3., 2., 1.])

# Applying softmax assigning probs to each tensors such that their probs sum = 1.00
softmax_output = F.softmax(tensor1, dim=0)
softmax_output 

tensor([0.6652, 0.2447, 0.0900])

In [5]:
voacb_size = 1000
embedding_dim = 5

embedding = nn.Embedding(voacb_size, embedding_dim)

# Create input
input_indices = torch.LongTensor([1, 3, 6, 4])

# Apply embedding
embed_out = embedding(input_indices)

print(embed_out)
print(embed_out.shape)

tensor([[ 0.5647,  0.5163,  0.5341, -1.4410, -1.3746],
        [-0.1377,  0.1581, -0.1656,  0.7313,  0.2542],
        [ 0.2803, -0.8372, -0.5901,  0.5287, -0.5120],
        [ 0.7243, -0.2106,  0.4439,  0.3862, -1.2321]],
       grad_fn=<EmbeddingBackward0>)
torch.Size([4, 5])


### Matrix Multiplication

In [4]:
import torch

a = torch.tensor([[1,2], [3,4], [5,6]])
b = torch.tensor([[7, 8, 9], [10, 11, 12]])

print(a.shape, b.shape)

print(torch.matmul(a, b))
print(a@b)

torch.Size([3, 2]) torch.Size([2, 3])
tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])
tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])


In [13]:
int_64 = torch.randint(1, (3, 2))
float_32 = torch.rand(2, 3)

print(int_64, float_32, sep="\n")

result = int_64 @ float_32
print(result)

tensor([[0, 0],
        [0, 0],
        [0, 0]])
tensor([[0.7889, 0.8850, 0.7340],
        [0.7800, 0.1345, 0.3495]])


RuntimeError: expected m1 and m2 to have the same dtype, but got: long int != float

In [17]:
int_64 = torch.randint(1, (3, 2)).float()       # Cast int -> float
float_32 = torch.rand(2, 3)

print(int_64, float_32, sep="\n")

result = int_64 @ float_32
print(result)

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0.1405, 0.0167, 0.8322],
        [0.0125, 0.3191, 0.0737]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


### Re-Shaping

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
input = torch.rand((2, 3, 4))
print(input)

B, T, C = input.shape
output = input.view(B*T, C)
print(output)
print(output.shape)

tensor([[[0.8308, 0.8217, 0.0874, 0.1216],
         [0.3824, 0.8602, 0.8522, 0.5329],
         [0.9385, 0.2326, 0.6216, 0.3302]],

        [[0.9652, 0.1530, 0.2825, 0.5334],
         [0.1360, 0.7825, 0.0095, 0.9409],
         [0.9173, 0.8536, 0.7415, 0.8755]]])
tensor([[0.8308, 0.8217, 0.0874, 0.1216],
        [0.3824, 0.8602, 0.8522, 0.5329],
        [0.9385, 0.2326, 0.6216, 0.3302],
        [0.9652, 0.1530, 0.2825, 0.5334],
        [0.1360, 0.7825, 0.0095, 0.9409],
        [0.9173, 0.8536, 0.7415, 0.8755]])
torch.Size([6, 4])


### Activation Function

#### 1. ReLU
$$ReLU(x) = \max(0, x)$$

#### 2. Sigmoid
$$\sigma(x) = \frac{1}{1 + e^{-x}}$$

In [18]:
x = torch.tensor([-0.5], dtype=torch.float32)
relu = nn.ReLU(x)
print(relu)

sigmoid = F.sigmoid(x)
print(1/(torch.exp(-x) + 1))
print(sigmoid)

ReLU(inplace=True)
tensor([0.3775])
tensor([0.3775])
