In [2]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# Check if MPS is available
print(torch.backends.mps.is_available())

# Check if MPS can be used
print(torch.backends.mps.is_built())

True
True


In [4]:
# Define the MPS device
device = torch.device("mps")
# 항상 모델 코드를 짜고 난 후에 model = 내가정의한모델().to(device) 이렇게 추가해야 GPU 위에서 돌아가게 할 수 있다.
# Pytorch는 input data와 model이 같은 device 위에 올려져야하기 때문에 inputs, labels = inputs.to(device), labels.to(device) 이렇게도 해줘야한다. 

## Introduction to Tensors

### Creating Tensors

In [6]:
# Scalar is one of tensor
scalar = torch.tensor(7)
scalar

tensor(7)

In [7]:
scalar.ndim # Because scalar has no dimension

0

In [8]:
scalar.item()
# Convert tensor into Python int

7

In [9]:
# Vector
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [10]:
vector.ndim # Why 1 dim? Because we have 1 square bracket. []의 갯수가 차원의 수이다.
# Because it's a 1D Array!

1

In [11]:
vector.shape # Why shape 2? Because 2 elements along single dimension

torch.Size([2])

In [12]:
# MATRIX
MATRIX = torch.tensor([[7, 8],
                       [9, 10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [13]:
MATRIX.shape # 2x2 matrix이기 때문이다.

torch.Size([2, 2])

In [14]:
MATRIX.ndim # Should be 2

2

In [15]:
# TENSOR
TENSOR = torch.tensor([[[1,2,3], 
                        [3,6,9],
                        [2,4,5]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [16]:
TENSOR.ndim

3

In [17]:
TENSOR.shape

torch.Size([1, 3, 3])

In [18]:
TENSOR[0]

tensor([[1, 2, 3],
        [3, 6, 9],
        [2, 4, 5]])

In [22]:
print(TENSOR[0][0])
print(TENSOR[0][0][0])

tensor([1, 2, 3])
tensor(1)


## Random Tensors

Why random tensors? Weight initialization is random.

NN starts with tensors full of random numbers and then adjust those random numbers to better represent the data.

"Adjusting numbers"

In [25]:
# Create a random tensor of size (3,4)
random_tensor = torch.rand(2, 5, 5)  # 3 rows, 4 columns
random_tensor

tensor([[[0.2356, 0.5711, 0.1637, 0.6672, 0.3183],
         [0.2058, 0.7357, 0.3615, 0.7486, 0.0631],
         [0.0323, 0.0301, 0.7154, 0.1409, 0.6231],
         [0.1667, 0.8137, 0.9611, 0.2009, 0.8688],
         [0.1874, 0.2085, 0.6529, 0.2228, 0.9313]],

        [[0.4538, 0.7867, 0.3547, 0.9298, 0.2351],
         [0.2772, 0.0715, 0.7376, 0.8142, 0.0876],
         [0.9019, 0.0673, 0.4857, 0.1964, 0.6893],
         [0.7400, 0.6828, 0.9329, 0.9987, 0.0818],
         [0.4620, 0.5614, 0.3103, 0.8723, 0.3265]]])

In [26]:
random_tensor.ndim

3

In [28]:
# Create a random tensor with similar shape to an image tensor
random_image_size_tensor = torch.rand(3, 256, 256) #행, 열, RGB(레드 그린 블루 패널 3개)
random_image_size_tensor
# popular shape: color x height x width

tensor([[[0.3281, 0.6812, 0.3186,  ..., 0.2321, 0.9316, 0.2629],
         [0.5251, 0.4432, 0.6910,  ..., 0.4855, 0.6138, 0.6891],
         [0.0400, 0.8871, 0.5627,  ..., 0.9201, 0.0858, 0.0136],
         ...,
         [0.3644, 0.7590, 0.2259,  ..., 0.6545, 0.0742, 0.9440],
         [0.7935, 0.5869, 0.3889,  ..., 0.2436, 0.5092, 0.2302],
         [0.5047, 0.9402, 0.3230,  ..., 0.7082, 0.4200, 0.6654]],

        [[0.6802, 0.7121, 0.3442,  ..., 0.7751, 0.7457, 0.1589],
         [0.1523, 0.8453, 0.6861,  ..., 0.3257, 0.7034, 0.6286],
         [0.3011, 0.4610, 0.0237,  ..., 0.1967, 0.1305, 0.8388],
         ...,
         [0.1355, 0.0014, 0.4999,  ..., 0.7620, 0.1214, 0.7550],
         [0.6072, 0.9784, 0.9093,  ..., 0.5892, 0.4425, 0.7693],
         [0.9017, 0.2845, 0.8342,  ..., 0.2741, 0.5360, 0.0689]],

        [[0.8357, 0.1172, 0.4153,  ..., 0.4261, 0.1444, 0.0726],
         [0.6335, 0.3890, 0.8252,  ..., 0.4942, 0.3523, 0.7716],
         [0.3683, 0.2749, 0.5755,  ..., 0.6344, 0.3500, 0.

## Zeros and Ones

Useful when masking

In [44]:
zeros = torch.zeros(3,4)  # What size do I need to wipe out the first column?
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [45]:
random1 = torch.rand(3,4)
random1

tensor([[0.8642, 0.7834, 0.4105, 0.7751],
        [0.2258, 0.6823, 0.6717, 0.1237],
        [0.7000, 0.5160, 0.1975, 0.0268]])

In [47]:
masked_tensor = zeros * random1
masked_tensor

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [48]:
ones = torch.ones(3,4)

In [49]:
ones.dtype # Default data type의 줄임말이다.

torch.float32

## Creating a range of tensors and tensors-like

In [52]:
one_to_ten = torch.arange(1, 11)
one_to_ten

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [53]:
# Same as python range indexing
skip_tensor = torch.arange(1, 100, 10)
skip_tensor

tensor([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])

In [54]:
# 어떤 텐서가 있을 때, 그 텐서랑 같은 shape를 하도록 만들어 줄 수 있다.
# Creating tensors like

ten_zeros = torch.zeros_like(one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Tensor data types

**Note: Tensor datatypes is one of the 3 big errors you'll run into Pytorch:
1. Tensors not right datatype
2. Tensors not the right shape
3. Tensors not on the right device

In [56]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], 
                               dtype=None, 
                               device=device, 
                               requires_grad=False)

float_32_tensor


# These 3 are the most important parameters of a tensor.
# dtype is for datatype, which goes like: float32(pytorch default), float64, float16
# float16 sacrifices detail half than float32 but computes 2 times faster(less memory)
# device = MPS, so we can use GPU as: device = device.
# requires_grad: whether or not to track gradients with this tensors operations


tensor([3., 6., 9.], device='mps:0')

## Getting Tensor Attributes

1. Shape : tensor.dtype
2. datatype : tensor.shape
3. device : tensor.device

In [58]:
some_tensor = torch.rand(3,4)
some_tensor = some_tensor.to(device)  # Move our tensor to device
print(some_tensor.dtype)
print(some_tensor.shape)
print(some_tensor.device)

torch.float32
torch.Size([3, 4])
mps:0


## Tensor Operations

Tensor operations include:

1. Addition
2. Subtraction
3. Multiplication (element-wise) *If you want matrix multiplication, use matmul
4. Division
5. Matrix Multiplication

In [66]:
# Addition
tensor1 = torch.tensor([1, 2, 3])
tensor1 += 10
tensor1

tensor([11, 12, 13])

In [67]:
# Multiplication
tensor1 *= 10
tensor1

tensor([110, 120, 130])

In [68]:
# Or you can try bulit-in function
torch.mul(tensor1, 10)

tensor([1100, 1200, 1300])

In [69]:
# Division
tensor1 //= 10
tensor1

tensor([11, 12, 13])

## Matrix multiplcation

1. Element-wise multiplication (그냥 각 항목을 상수배 해주는)
2. Dot Product (matmul)

In [70]:
torch.matmul(tensor1, tensor1) # Dot product result is a scalar.

tensor(434)

In [72]:
tensor2 = torch.randn(4, 100)
tensor3 = torch.randn(100,2)
result1 = torch.matmul(tensor2, tensor3)
result1

# The results would be 4 x 2. The 100 is like the weights of the neural networks

tensor([[ -1.4824,  17.5870],
        [ 14.4894,   5.6143],
        [  4.6278,  -4.5474],
        [-24.6648,   6.8049]])

In [73]:
# Vector transpose through .T
# The code below will produce an error
tensor4 = torch.randn(3, 100)
torch.matmul(tensor2, tensor4)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (4x100 and 3x100)

In [74]:
torch.matmul(tensor2, tensor4.T) # If we transpose it and make the shape 100 x 3, matmul!

tensor([[-24.2405,  -3.0962,   2.6595],
        [ -8.4644,  -9.2487,   1.6615],
        [ 13.8404,   9.4026,  -4.3244],
        [  3.5046,  -4.3973,  -6.0458]])

## Tensor Aggregation

### Finding the min, max, mean, sum, etc

In [77]:
# Create a tensor
x = torch.arange(0, 100, 10)
x, x.dtype

(tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]), torch.int64)

In [79]:
# Note: torch.mean() requires a tensor of float32 datatype to work

print(torch.max(x))
print(x.max)
print(torch.min(x))
print(x.min)
print(torch.mean(x.type(torch.float32))) 
print(x.mean)
print(torch.sum(x))
print(x.sum)

tensor(90)
<built-in method max of Tensor object at 0x14de830e0>
tensor(0)
<built-in method min of Tensor object at 0x14de830e0>
tensor(45.)
<built-in method mean of Tensor object at 0x14de830e0>
tensor(450)
<built-in method sum of Tensor object at 0x14de830e0>


### Finding the positional min and max

In [81]:
# Find the position in tensor that has the minimun value
# Returns index position of taret tensor where the minimum value occurs
x.argmin()

tensor(9)

In [82]:
x.argmax()

tensor(9)

### Reshaping, stacking, squeezing, and unsqueezing tensors

* Reshaping: Reshapes an input tensor to a defined shape

* Stacking: Combine multiple tensors on top / side by side

* Squeeze: Removes all '1' dimensions from a tensor

* Unsqueeze: Add a '1' dimension to a target tensor

* Permute: Return a view of the input with dimensions permuted (swapped) in a certain way

In [86]:
tensor5 = torch.arange(1., 11.)
tensor5, tensor5.shape

(tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]), torch.Size([10]))

In [87]:
# Reshaping tensors. 
# The reshaped result "must conserve the number of elements"
# In this case, we only have 10 (torch.Size[10])

tensor_reshaped = tensor5.reshape(5,2) # 5 x 2 is 10
tensor_reshaped, tensor_reshaped.shape

(tensor([[ 1.,  2.],
         [ 3.,  4.],
         [ 5.,  6.],
         [ 7.,  8.],
         [ 9., 10.]]),
 torch.Size([5, 2]))

In [88]:
# Stacking tensors

stacked_tensor = torch.stack([tensor5, tensor5, tensor5, tensor5])
stacked_tensor

tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]])

In [92]:
# Squeezing tensors
# Note: Squeezing removes "1" dimension

squeezed_tensor = tensor_reshaped.squeeze()
squeezed_tensor, squeezed_tensor.shape

# This does not remove anything

(tensor([[ 1.,  2.],
         [ 3.,  4.],
         [ 5.,  6.],
         [ 7.,  8.],
         [ 9., 10.]]),
 torch.Size([5, 2]))

In [95]:
# But this does!

tensor6 = torch.rand(1,9)
squeezed_tensor2 = tensor6.squeeze()
tensor6.shape, squeezed_tensor2, squeezed_tensor2.shape

(torch.Size([1, 9]),
 tensor([0.7492, 0.2213, 0.7740, 0.1490, 0.3385, 0.8517, 0.6635, 0.2477, 0.8125]),
 torch.Size([9]))

In [97]:
# Unsqueeze, adds a 1 dimension at the position of parameter "dim="
unsqueezed = squeezed_tensor2.unsqueeze(dim=1)
unsqueezed, unsqueezed.shape

(tensor([[0.7492],
         [0.2213],
         [0.7740],
         [0.1490],
         [0.3385],
         [0.8517],
         [0.6635],
         [0.2477],
         [0.8125]]),
 torch.Size([9, 1]))

In [98]:
# Permute
# Same as permutation matrix (swapping rows)

tensor7 = torch.rand(256, 256, 3)
permuted = tensor7.permute(2, 1, 0) #2번째 차원(3)이 맨 앞으로, 1번째 차원(256)이 그 다음, 0번째 차원이 다음)
permuted.shape

torch.Size([3, 256, 256])

## Tensor Indexing

In [111]:
tensor8 = torch.tensor([[[1,2,3],
                       [4,5,6],
                       [7,8,9]]])
tensor8

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [113]:
# How to ouput 9?
# : stands for "select all values of that dimension"

tensor8[0][2][2] 

tensor(9)

In [118]:
# How to output 3, 6, 9?
tensor8[:, :, 2]

tensor([[3, 6, 9]])

### Note: 

#### The expressions `tensor[:][:][2]` and `tensor[:,:,2]` differ because of how indexing works in multi-dimensional tensors (or arrays).

### 1. `tensor[:][:][2]`
- **Step-by-step breakdown**:
  - `tensor[:]` creates a view or shallow copy of the tensor.
  - `tensor[:][:]` applies another slice operation to the result of `tensor[:]`, which is effectively the same as the first slice. However, it's less clear and could potentially change the structure depending on the implementation.
  - `tensor[:][:][2]` then takes the third element (index 2) of this sliced structure, which might not be the intended operation on the original tensor.

### 2. `tensor[:,:,2]`
- **Explanation**:
  - This syntax directly indexes the third dimension (or channel) across all rows and columns of the tensor.
  - `:` indicates selecting all elements along that dimension, while `2` selects the third element of the last dimension.

### Why They Are Different
- `tensor[:][:][2]` does not directly select the third "slice" in the third dimension of a tensor. Instead, it first modifies the tensor’s shape with each slicing, eventually resulting in a 1D array (or a vector). Then it selects the third element of that 1D array.
- `tensor[:,:,2]`, however, is a direct slicing of the third dimension and is the correct way to access that particular slice in a multi-dimensional tensor.

In short, `tensor[:,:,2]` is the correct and direct way to slice the third element along the third dimension, while `tensor[:][:][2]` ends up being a chain of operations that can yield unexpected results.

## Pytorch tensors & Numpy

* Data in Numpy -> Pytorch tensor : torch.from_numpy(ndarray)
* Pytorch tensor -> NumPy -> torch.Tensor.numpy()

In [119]:
import torch
import numpy as np

array = np.arange(1.0, 8.0)
tensor9 = torch.from_numpy(array)
array, tensor9

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [123]:
# When converting numpy -> tensor, default numpy dtype is floa64, while pytorch is float32
tensor10 = tensor9.type(torch.float32)
tensor10.dtype

torch.float32

## Reproducibility of tensors (Random seeds)

In [124]:
# Controled randomness. It works only one block of code at a time.

random_seed = 42
torch.manual_seed(random_seed)
rand_tensor1 = torch.rand(3,4)

torch.manual_seed(random_seed)
rand_tensor2 = torch.rand(3,4)

print(rand_tensor1 == rand_tensor2)

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Puting tensors (and models) on the GPU

For faster computations

In [125]:
tensor11 = torch.tensor([1,2,3])
print(tensor11, tensor11.device)
# Currently on cpu

tensor([1, 2, 3]) cpu


In [126]:
# Let's move this tensor to GPU
tensor11_gpu = tensor11.to(device)
tensor11_gpu

tensor([1, 2, 3], device='mps:0')