Note: This code has nothing to do with the Custom LLM! It's just a space for me to test functions and how they work.

In [1]:
import torch
import numpy as np
import time
device = "mps" if torch.backends.mps.is_available() and torch.backends.mps.is_built() else device

In [2]:
%%time
start_time = time.time()
# matrix operations
zeros = torch.zeros(1, 1)
end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

0.00014687
CPU times: user 146 μs, sys: 109 μs, total: 255 μs
Wall time: 183 μs


In [3]:
# Complex Task MPS vs CPU time on MacOS
torch_rand1 = torch.rand(10000, 10000).to(device)
torch_rand2 = torch.rand(10000, 10000).to(device)

np_rand1 = torch.rand(10000, 10000)
np_rand2 = torch.rand(10000, 10000)

start_time = time.time()

rand = (torch_rand1 @ torch_rand2)

end_time = time.time()

elapsed_time = end_time - start_time
print(f"MPS time {elapsed_time:.8f}")

start_time = time.time()

rand = (np_rand2 @ np_rand2)

end_time = time.time()

elapsed_time = end_time - start_time
print(f"CPU time {elapsed_time:.8f}")

MPS time 0.03118277
CPU time 1.81965399


In [4]:
# Simple Task MPS vs CPU time on MacOS
torch_rand1 = torch.rand(100, 100, 100, 100).to(device)
torch_rand2 = torch.rand(100, 100, 100, 100).to(device)

np_rand1 = torch.rand(100, 100, 100, 100)
np_rand2 = torch.rand(100, 100, 100, 100)

start_time = time.time()

rand = (torch_rand1 @ torch_rand2)

end_time = time.time()

elapsed_time = end_time - start_time
print(f"MPS time {elapsed_time:.8f}")

start_time = time.time()

rand = (np_rand2 @ np_rand2)

end_time = time.time()

elapsed_time = end_time - start_time
print(f"CPU time {elapsed_time:.8f}")

MPS time 0.01090598
CPU time 0.08869290


In [5]:
# Define probability tensor
probabilities = torch.tensor([0.1, 0.9])
# Each probability points to the index it is situated in
samples = torch.multinomial(probabilities, num_samples=10, replacement=True)
samples

tensor([1, 1, 1, 1, 0, 1, 1, 1, 1, 1])

In [6]:
tensor = torch.tensor([1,2,3,4,])
combined = torch.cat((tensor, torch.tensor([5,])), dim=0)
combined

tensor([1, 2, 3, 4, 5])

In [7]:
out = torch.tril(torch.ones(5, 5))
out

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [8]:
out = torch.triu(torch.ones(5, 5))
out

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

In [9]:
# Masked fill returns second param for all values that return true from first param
out = torch.zeros(5,5).masked_fill(torch.tril(torch.ones(5, 5)) == 0, float('-inf'))
out

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])

In [10]:
# Applies e^(index)
# e^0 = 1, e^-inf = 0
torch.exp(out)

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [11]:
input = torch.zeros(2,3,4)
input.transpose(0,2) # swap 0th dim with 2nd dim
input.shape

torch.Size([2, 3, 4])

In [12]:
tensor1 = torch.tensor([1,2,3])
tensor2 = torch.tensor([4,5,6])
tensor3 = torch.tensor([7,8,9])

stacked_tensor = torch.stack([tensor1, tensor2, tensor3])
stacked_tensor

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [14]:
import torch.nn as nn
sample = torch.tensor([10., 10., 10.])
linear = nn.Linear(3, 3, bias=False)
print(linear(sample))

tensor([3.9916, 1.7967, 6.8815], grad_fn=<SqueezeBackward4>)


In [15]:
import torch.nn.functional as F

tensor1 = torch.tensor([1., 2., 3.,])

softmax_output = F.softmax(tensor1, dim=0)
softmax_output

tensor([0.0900, 0.2447, 0.6652])

In [18]:
# Embedding vectors
vocab_size = 1000
embedding_dim = 100
embedding = nn.Embedding(vocab_size, embedding_dim)

input_indices = torch.tensor([1, 3, 5, 2], dtype=torch.long)

embedded_output = embedding(input_indices)

print(embedded_output.shape)

torch.Size([4, 100])


In [19]:
a = torch.tensor([[1, 2], [3, 4], [5, 6]])
b = torch.tensor([[7, 8, 9], [10, 11, 12]])
a@b

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

In [31]:
int_64 = torch.randint(1, (3, 2)).float()
float_32 = torch.rand(2, 3)
int_64 @ float_32

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [35]:
# Shape reformats but does not create equivalent items (similar to cpp where its pass by value)
a = torch.rand(2, 3, 5)
print(a.shape)
x, y, z = a.shape
a = a.view(x, y, z)
print(a.shape)

torch.Size([2, 3, 5])
torch.Size([2, 3, 5])
