In [1]:
import torch
import numpy as np
import time
from torch.nn import functional as F
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
print(device)

mps


In [9]:
%%time
# method of getting time diff
start_time = time.time()
# matrix operations here
zeros = torch.zeros(1,1)
end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

0.00066495
CPU times: user 1.18 ms, sys: 567 µs, total: 1.75 ms
Wall time: 1.33 ms


In [8]:
# measures difference between GPU & CPU
torch_rand1 = torch.rand(100, 100, 100, 100).to(device)
torch_rand2 = torch.rand(100, 100, 100, 100).to(device)
np_rand1 = torch.rand(100, 100, 100, 100)
np_rand2 = torch.rand(100, 100, 100, 100)

# Using MPS(GPU)
start_time = time.time()

rand = (torch_rand1 @ torch_rand2)    # @ -> do matrix multiplication

end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")


# Using CPU
start_time = time.time()

rand = np.multiply(np_rand1, np_rand2)

end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

0.01035380
0.09004688


In [17]:
# (Show all examples of functions/methods with pytorch docs)
# torch.stack, torch.multinomial, torch.tril, torch.triu, input.T/input.transpose, nn.Linear, torch.cat, F.softmax

In [14]:
# Define probability tensor
probabilities = torch.tensor([0.1, 0.9])
# 10% or 0.1 -> 0, 90% or 0.9 -> 1. Each probability points to the index of probability in the tensor
# Draw 5 samples from the multinomial distribution
samples = torch.multinomial(probabilities, num_samples=10, replacement=True)
print(samples)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 0, 1])


In [24]:
# Concatenate two tensors into one
tensor = torch.tensor([1,2,3,4])
output = torch.cat((tensor, torch.tensor([5])), dim=0)
output

tensor([1, 2, 3, 4, 5])

In [30]:
# Lower triangular
output = torch.tril(torch.ones(5,5))
output

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [31]:
# Upper triangular
output = torch.triu(torch.ones(5,5))
output

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

In [35]:
# masked_fill
# Exponential to get lower/upper triangular -> eg. e的0次方 = 1, e的負無限次方 = 0
output_for_e = torch.zeros(5,5).masked_fill(torch.tril(torch.ones(5,5)) == 0, float('-inf'))
output_for_e

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])

In [36]:
torch.exp(output_for_e)

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [40]:
input = torch.zeros(2,3,4)    # 2個矩陣，3 rows，4 columns
out = input.transpose(0,2)    # -> (4,3,2), 0 index change with 2 index
out.shape

torch.Size([4, 3, 2])

In [41]:
# torch.stack
# We use this to stack multiple vertors into one matrix (block)
tensor1 = torch.tensor([1,2,3])
tensor2 = torch.tensor([4,5,6])
tensor3 = torch.tensor([7,8,9])

# Stack the tensors along a new dimension
stacked_tensor = torch.stack([tensor1, tensor2, tensor3])
stacked_tensor

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [44]:
# nn.Linear function
import torch.nn as nn
sample = torch.tensor([10., 10., 10.])
linear = nn.Linear(3, 3, bias=False)    # bias->weight
print(linear(sample))

tensor([-5.8418, 11.1823, -4.0782], grad_fn=<SqueezeBackward4>)


In [52]:
# softmax function

# if we had [1,2,3], get the exponential of those(e^1,e^2,e^3) and add them up to have a sum(29.5).
# divide each exponentiated elements by the sum to get new [x,y,z]
import torch.nn.functional as F

# Create a tensor
tensor1 = torch.tensor([1.0, 2.0, 3.0])

# Apply softmax using torch.functional.softmax()
softmax_output = F.softmax(tensor1, dim=0)

print(softmax_output)

tensor([0.0900, 0.2447, 0.6652])


In [51]:
# Embedding vectors
import torch
import torch.nn as nn

# Suppose you have a vocabulary size of 10000 and want to represent words with 100-dimensional embeddings
num_embeddings=10000
embedding_dim=100
embedding = nn.Embedding(num_embeddings, embedding_dim)

# Input tensor with token indices
input_indices = torch.tensor([1, 5, 3, 7, 2])

# Forward pass through the embedding layer
embedded_vectors = embedding(input_indices)

print(embedded_vectors.shape)

torch.Size([5, 100])


In [56]:
a = torch.tensor([[1,2], [3,4], [5,6]])
b = torch.tensor([[7,8,9], [10,11,12]])
print(torch.matmul(a,b)) # equals print(a @ b)

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])


In [63]:
# pytorch does not support integer @ float
# int_64 = torch.randint(1,(3,2)) will give int -> dtype = int64

int_64 = torch.randint(1,(3,2)).float()
# type float32
float_32 = torch.rand(2,3)
# type float32

# print(int_64.dtype, float_32.dtype)
result = int_64 @ float_32
print(result)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [65]:
a = torch.rand(2 ,3, 5)
x, y, z = a.shape
a = a.view(x,y,z)
print(x,y,z)
print(a.shape)

2 3 5
torch.Size([2, 3, 5])


In [4]:
# torch.nn.functional.sigmoid -> Sigmoid(x)=σ(x)= 1/(1+exp(−x))
x = torch.tensor([-0.05], dtype=torch.float32)
y = F.sigmoid(x)
print(y)

tensor([0.4875])


In [5]:
# torch.nn.functional.tanh
x = torch.tensor([1], dtype=torch.float32)
y = F.tanh(x)
print(y)

tensor([0.7616])
