In [2]:
import torch

In [3]:
randint = torch.randint(-100, 100, (6,))
randint

tensor([ 20,  31,  46,  32, -87, -56])

In [4]:
tensor = torch.tensor([[0.1, 1.2], [2.2, 3.1], [4.9, 5.2]])
tensor

tensor([[0.1000, 1.2000],
        [2.2000, 3.1000],
        [4.9000, 5.2000]])

In [5]:
zeros = torch.zeros(2,3)
zeros

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [6]:
ones = torch.ones(3,4)
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [7]:
input = torch.empty(2,3)
input

tensor([[-3.3787e+18,  3.0941e-41, -3.3810e+18],
        [ 3.0941e-41,  0.0000e+00,  0.0000e+00]])

In [8]:
arange = torch.arange(5)
arange

tensor([0, 1, 2, 3, 4])

In [9]:
linspace = torch.linspace(3, 10, steps=5)
linspace

tensor([ 3.0000,  4.7500,  6.5000,  8.2500, 10.0000])

In [10]:
logspace = torch.logspace(start=-10, end=10, steps=5)
logspace

tensor([1.0000e-10, 1.0000e-05, 1.0000e+00, 1.0000e+05, 1.0000e+10])

In [11]:
eye = torch.eye(5)
eye

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [12]:
a = torch.empty((2,3), dtype=torch.int64)
empty_like = torch.empty_like(a)
empty_like

tensor([[              0,               0,  94836606367616],
        [ 94836577378496, 140157979713488,               1]])

In [13]:
import time
import numpy as np

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [14]:
%%time
start_time = time.time()

zeros = torch.zeros(1,1)

end_time = time.time()

elapsed_time = end_time - start_time
f"{elapsed_time:.4f}"

CPU times: user 255 µs, sys: 94 µs, total: 349 µs
Wall time: 319 µs


'0.0002'

In [15]:
# torch_rand1 = torch.rand(10000, 10000).to(device)
# torch_rand2 = torch.rand(10000, 10000).to(device)

torch_rand1 = torch.rand(100, 100, 100, 100).to(device)
torch_rand2 = torch.rand(100, 100, 100, 100).to(device)
np_rand1 = torch.rand(100, 100, 100, 100)
np_rand2 = torch.rand(100, 100, 100, 100)

start_time = time.time()

rand = (torch_rand1 @ torch_rand2)

end_time = time.time()

elapsed_time = end_time - start_time
print(f"GPU -> {elapsed_time:.8f} seconds")


start_time = time.time()

rand = np.multiply(np_rand1, np_rand2)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"CPU -> {elapsed_time:.8f} seconds")

GPU -> 0.87399912 seconds
CPU -> 0.15711117 seconds


In [16]:
# Define probability tensor
probability = torch.tensor([0.1, 0.9])
# 10% or 0.1 => 0, 90% or 0.9 => 1. each probability points to the index of the probability in the tensor
# Draw 5 samples from the multinomial distribution
samples = torch.multinomial(probability, num_samples=10, replacement=True)
samples

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [17]:
tensor = torch.tensor([1,2,3,4])
out = torch.cat((tensor, torch.tensor([5])), dim=0)
out

tensor([1, 2, 3, 4, 5])

In [18]:
out = torch.tril(torch.ones(5, 5))
out

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [19]:
out = torch.triu(torch.ones(5,5))
out

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

In [20]:
out = torch.zeros(5,5).masked_fill(torch.tril(torch.ones(5,5)) == 0, float('-inf'))
out

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])

In [21]:
torch.exp(out)

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [22]:
input = torch.zeros(2,3,4)
out = input.transpose(0, 2)
out.shape

torch.Size([4, 3, 2])

In [23]:
tensor1 = torch.tensor([1, 2, 3])
tensor2 = torch.tensor([4, 5, 6])
tensor3 = torch.tensor([7, 8, 9])

# Stack the tensors along a new dimension
stacked_tensor = torch.stack([tensor1, tensor2, tensor3])
stacked_tensor

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [24]:
import torch.nn as nn 

sample = torch.tensor([10., 10., 10.])
linear = nn.Linear(3,3, bias=False)
print(linear(sample))

tensor([ 1.2544, -2.6928, -8.1884], grad_fn=<SqueezeBackward4>)


In [25]:
import torch.nn.functional as F

# Create a tensor to do the softmax
tensor1 = torch.tensor([1., 2., 3.])

# Apply softmax
softmax_out = F.softmax(tensor1, dim=0)
softmax_out

tensor([0.0900, 0.2447, 0.6652])

In [26]:
# Fiddling with embeddings (powered by ChatGPT)
# Initialize an embedding layer
vocab_size = 80
embedding_dim = 6
embedding = nn.Embedding(vocab_size, embedding_dim)

# Create some input indices
input_indices = torch.LongTensor([1, 5, 3, 2])

# Apply the embedding layer
embedded_output = embedding(input_indices)

# The output will be a tensor of shape (4, 100), where 4 is the number of inputs
# and 100 is the dimensionality of the embedding vectors
print(embedded_output.shape)
print(embedded_output)

torch.Size([4, 6])
tensor([[ 1.0955, -1.0229,  0.1675, -0.7774,  1.0356, -1.3426],
        [-0.8129,  0.5437,  0.4722,  2.9030,  1.4619, -0.1648],
        [ 1.4334, -0.0916, -0.7502, -0.3443, -0.6717,  0.5129],
        [ 0.6521,  0.1592,  0.4276,  0.0921,  1.0478, -0.5259]],
       grad_fn=<EmbeddingBackward0>)


In [27]:
# Getting used to matrix multiplications (dot products)
a = torch.tensor([[1,2],[3,4],[5,6]])
b = torch.tensor([[7,8,9],[10,11,12]])

print(torch.matmul(a, b))
a @ b

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])


tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

In [28]:
int_64 = torch.randint(1, (3, 2)).float()
#type int64
float_32 = torch.rand(2,3)
#type float32
print(int_64.dtype, float_32.dtype)
result = torch.matmul(int_64, float_32)
print(result)

torch.float32 torch.float32
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [30]:
a = torch.rand(2, 3, 5)
print(a.shape)
x, y, z = a.shape
a = a.view(x,y,z)
print(x, y, z)
print(a.shape)

torch.Size([2, 3, 5])
2 3 5
torch.Size([2, 3, 5])


In [33]:
input = torch.rand((4, 8, 10))
B, T, C = input.shape
output = input.view(B*T, C)
# print(output)
# print(input)
print(input[:, -1, :])

tensor([[0.7252, 0.0013, 0.7842, 0.7819, 0.5198, 0.2051, 0.0970, 0.8565, 0.8796,
         0.6702],
        [0.4771, 0.8916, 0.4851, 0.7274, 0.2178, 0.2436, 0.8854, 0.9096, 0.8808,
         0.4003],
        [0.9904, 0.7092, 0.6497, 0.4623, 0.6807, 0.1497, 0.3258, 0.2963, 0.0579,
         0.8644],
        [0.0823, 0.8005, 0.6263, 0.3527, 0.0201, 0.8090, 0.4861, 0.0959, 0.6465,
         0.3683]])


In [40]:
x = torch.tensor([-0.05, 10, 1, -2], dtype=torch.float32)
y = F.relu(x)
y

tensor([ 0., 10.,  1.,  0.])

In [41]:
x = torch.tensor([-0.05, 10, 1, -2], dtype=torch.float32)
y = F.sigmoid(x)
y

tensor([0.4875, 1.0000, 0.7311, 0.1192])

In [42]:
x = torch.tensor([-0.05, 10, 1, -2], dtype=torch.float32)
y = F.tanh(x)
y

tensor([-0.0500,  1.0000,  0.7616, -0.9640])