In [10]:
import torch

# Check Device

In [17]:
print(f"NVIDIA Available: {torch.cuda.is_available()}")
print(f"MPS Available: {torch.backends.mps.is_available()}")

NVIDIA Available: False
MPS Available: True


# Tensors

In [37]:
tensor_0d = torch.tensor(1)
tensor_1d = torch.tensor([1,2,3])
tensor_2d = torch.tensor([[1,2,3],[4,5,6]])
tensor_3d = torch.tensor([[[1,2,3],[4,5,6]],[[1,2,3],[4,5,6]]])

### Data Type (dtype)

In [38]:
print(f"shape of 3d tensor: {tensor_3d.shape}")
print(f"type of 3d tensor: {tensor_3d.dtype}")

shape of 3d tensor: torch.Size([2, 2, 3])
type of 3d tensor: torch.int64


In [39]:
# change the type of a tensor
tensor_0d = tensor_0d.to(torch.float32)
print(tensor_0d.dtype)

torch.float32


### Tensor Operations

In [41]:
print(tensor_2d)
print(tensor_2d.shape)

tensor([[1, 2, 3],
        [4, 5, 6]])
torch.Size([2, 3])


In [42]:
# reshape the matrix
tensor_2d.reshape(3,2)

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [44]:
# reshape the matrix
tensor_2d.view(3,2)

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [46]:
# transpose the matrix
tensor_2d.T

tensor([[1, 4],
        [2, 5],
        [3, 6]])

In [47]:
tensor_2d.matmul(tensor_2d.T)

tensor([[14, 32],
        [32, 77]])

In [48]:
tensor_2d @ tensor_2d.T

tensor([[14, 32],
        [32, 77]])

### Computation Graphs

In [55]:
import torch.nn.functional as F

y = torch.tensor([1.0]) # true label
x1 = torch.tensor([1.1]) # input feature
w1 = torch.tensor([2.2]) # weight parameter
b = torch.tensor([0.0]) # bias unit

z = x1 * w1 + b # net input
a = torch.sigmoid(z) # output

loss = F.binary_cross_entropy(a,y)

print(loss)

tensor(0.0852)


#### Autograd

In [62]:
w1.requires_grad , b.requires_grad

(False, False)

In [71]:
y = torch.tensor([1.0]) # true label
x1 = torch.tensor([1.1]) # input feature
w1 = torch.tensor([2.2], requires_grad=True) # weight parameter
b = torch.tensor([0.0], requires_grad=True) # bias unit

z = x1 * w1 + b # net input
a = torch.sigmoid(z) # output

loss = F.binary_cross_entropy(a,y)

print(loss)

tensor(0.0852, grad_fn=<BinaryCrossEntropyBackward0>)


In [77]:
from torch.autograd import grad

grad_loss_w1 = grad(loss,w1,retain_graph=True) # PyTorch destroys the computation graph after calculating the gradients to free memory. However, since we are going to reuse this computation graph shortly, we set retain_graph=True so that it stays in memory.
grad_loss_b = grad(loss,b,retain_graph=True)

print(grad_loss_w1)
print(grad_loss_b)

(tensor([-0.0898]),)
(tensor([-0.0817]),)


In [78]:
#automate the above process
loss.backward()

print(w1.grad)
print(b.grad)

tensor([-0.0898])
tensor([-0.0817])


# Multi Layer Nueral Network

In [81]:
import torch.nn

class NeuralNetwork(torch.nn.Module):
    def __init__(self,in_dim,out_dim):
        super().__init__()
        self.layers = torch.nn.Sequential(
            # 1st hl
            torch.nn.Linear(in_dim, 30),
            torch.nn.ReLU(),

            #2nd hl
            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),

            # output l
            torch.nn.Linear(20, out_dim)
        )
    
    def forward(self, x):
        logits = self.layers(x)
        return logits

In [83]:
# torch.manual_seed(123)
model = NeuralNetwork(50,3)
model

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)

In [91]:
# calculate number of params
number_params = 0
for p in model.parameters():
    if p.requires_grad:
        number_params+=p.numel()
print(number_params)

2213


In [98]:
print(model.layers[0].bias)
print(model.layers[0].weight)

Parameter containing:
tensor([ 0.0577, -0.0875,  0.0094, -0.1068,  0.1104,  0.0912, -0.1130,  0.0303,
         0.1071, -0.0189, -0.1048,  0.0422, -0.0065, -0.0019, -0.0817,  0.0769,
        -0.0420,  0.0252, -0.0319, -0.0123,  0.0887, -0.0871,  0.1399, -0.0468,
        -0.0180,  0.0284, -0.0508, -0.0411, -0.0350, -0.0682],
       requires_grad=True)
Parameter containing:
tensor([[-0.1133,  0.1053,  0.0403,  ...,  0.1004, -0.1149,  0.0923],
        [-0.0896, -0.0357,  0.0218,  ...,  0.0985,  0.0785, -0.0420],
        [-0.1369,  0.0134, -0.0823,  ..., -0.0825, -0.0592, -0.0243],
        ...,
        [-0.0608, -0.0583,  0.1045,  ..., -0.0257, -0.0957, -0.1372],
        [ 0.1338,  0.0606, -0.0176,  ..., -0.1136, -0.0317, -0.0992],
        [-0.0429,  0.1365,  0.0176,  ..., -0.0850,  0.0983, -0.1007]],
       requires_grad=True)


In [101]:
torch.manual_seed(123)
X = torch.rand((1,50))
out = model(X)
print(out)

tensor([[-0.0131, -0.1209, -0.0638]], grad_fn=<AddmmBackward0>)


In [102]:
with torch.no_grad():
    output = torch.softmax(model(X),dim=1)
print(output)

tensor([[0.3511, 0.3152, 0.3337]])


# Data Loaders

In [110]:
# create random train/test dataset
X_train = torch.tensor(
    [
        [-1.2, 3.1],
        [-0.9, 2.9],
        [-0.5, 2.6],
        [2.3, -1.1],
        [2.7, -1.5]
    ]
)

y_train = torch.tensor(
    [0,0,0,1,1]
)

In [111]:
X_test = torch.tensor(
    [
        [-0.8, 2.8],
        [2.6, -1.6],
    ]
)

y_test = torch.tensor(
    [0,1]
)

#### Dataset

In [113]:
from torch.utils.data import Dataset
# the main compenents of a costum Dataset class are the __init__ , __getitem__ and __len__
class ToyDataset(Dataset):
    def __init__(self,X,y):
        self.features = X
        self.labels = y

    def __getitem__(self,index):
        one_x = self.features[index]
        one_y = self.labels[index]
        return one_x,one_y
    
    def __len__(self):
        return self.labels.shape[0]

In [114]:
train_dataset = ToyDataset(X_train,y_train)
test_dataset = ToyDataset(X_test,y_test)

In [120]:
len(train_dataset) # get len
train_dataset[-1] # get idx

(tensor([ 2.7000, -1.5000]), tensor(1))

#### DataLoader

In [123]:
from torch.utils.data import DataLoader
DataLoader?

[31mInit signature:[39m
DataLoader(
    dataset: torch.utils.data.dataset.Dataset[+_T_co],
    batch_size: Optional[int] = [32m1[39m,
    shuffle: Optional[bool] = [38;5;28;01mNone[39;00m,
    sampler: Union[torch.utils.data.sampler.Sampler, collections.abc.Iterable, NoneType] = [38;5;28;01mNone[39;00m,
    batch_sampler: Union[torch.utils.data.sampler.Sampler[list], collections.abc.Iterable[list], NoneType] = [38;5;28;01mNone[39;00m,
    num_workers: int = [32m0[39m,
    collate_fn: Optional[Callable[[list[~_T]], Any]] = [38;5;28;01mNone[39;00m,
    pin_memory: bool = [38;5;28;01mFalse[39;00m,
    drop_last: bool = [38;5;28;01mFalse[39;00m,
    timeout: float = [32m0[39m,
    worker_init_fn: Optional[Callable[[int], NoneType]] = [38;5;28;01mNone[39;00m,
    multiprocessing_context=[38;5;28;01mNone[39;00m,
    generator=[38;5;28;01mNone[39;00m,
    *,
    prefetch_factor: Optional[int] = [38;5;28;01mNone[39;00m,
    persistent_workers: bool = [38;5;28;01mFa

In [None]:
train_loader= DataLoader(
    dataset= train_dataset,
    batch_size = 2,
    shuffle = True,
    num_workers= 0 # If multiple workers are enabled, the data loader can already queue up the next batch in the background
)

In [129]:
test_loader= DataLoader(
    dataset= test_dataset,
    batch_size = 2,
    shuffle = False,
    num_workers= 0
)

In [142]:
for i, (x,y) in enumerate(train_loader):
    print(f"Batch {i+1}: {x,y}")

Batch 1: (tensor([[-1.2000,  3.1000],
        [-0.9000,  2.9000]]), tensor([0, 0]))
Batch 2: (tensor([[ 2.3000, -1.1000],
        [ 2.7000, -1.5000]]), tensor([1, 1]))


In [141]:
# drop the last batch 
train_loader= DataLoader(
    dataset= train_dataset,
    batch_size= 2,
    shuffle= True,
    num_workers= 0,
    drop_last= True
)
for i,(x,y) in enumerate(train_loader):
    print(f"Batch{i+1}: {x,y}")

Batch1: (tensor([[-1.2000,  3.1000],
        [-0.9000,  2.9000]]), tensor([0, 0]))
Batch2: (tensor([[ 2.3000, -1.1000],
        [ 2.7000, -1.5000]]), tensor([1, 1]))


In [143]:
len(train_loader)

2

# Typical Trianing Loop

#### Train the model

In [150]:
import torch.nn.functional as F

model = NeuralNetwork(2,2)
optimizer = torch.optim.SGD(model.parameters(),lr=0.5)
num_epochs= 3

for e in range(num_epochs):
    model.train() # if we have dropout or normalization in our arch (activate)
    for i,(x,y) in enumerate(train_loader):
        logits= model(x)
        print(logits.shape,y.shape)
        loss = F.cross_entropy(logits,y)

        optimizer.zero_grad() #reset gradient to zero otherwise the gradient will accumulate
        loss.backward() # calculate the gradient
        optimizer.step() # update parameters

        print(f"Epoch {e+1}/{num_epochs} | Batch {i}/{len(train_loader)} | Train Loss {loss:.2f} ")
    model.eval() # deactivate dropout and batch normalization
    # Optional model evaluation

torch.Size([2, 2]) torch.Size([2])
Epoch 1/3 | Batch 0/2 | Train Loss 0.46 
torch.Size([2, 2]) torch.Size([2])
Epoch 1/3 | Batch 1/2 | Train Loss 1.14 
torch.Size([2, 2]) torch.Size([2])
Epoch 2/3 | Batch 0/2 | Train Loss 0.23 
torch.Size([2, 2]) torch.Size([2])
Epoch 2/3 | Batch 1/2 | Train Loss 0.07 
torch.Size([2, 2]) torch.Size([2])
Epoch 3/3 | Batch 0/2 | Train Loss 0.07 
torch.Size([2, 2]) torch.Size([2])
Epoch 3/3 | Batch 1/2 | Train Loss 0.02 


#### Evaluation

In [158]:
# inference 
model.eval()
with torch.no_grad():
    outputs = model(X_test)
torch.softmax(outputs,dim=1)

tensor([[0.9784, 0.0216],
        [0.0048, 0.9952]])

In [164]:
torch.set_printoptions(sci_mode=False) # used here to make the outputs more legible.
y_predict = torch.argmax(torch.softmax(outputs,dim=1),dim=1)
y_predict

tensor([0, 1])

In [166]:
torch.sum(y_predict == y_test)

tensor(2)

#### Calculate the accuracy

In [167]:
def compute_accuracy(model,dataloader):
    correct_labels = 0.0
    total_labels = 0

    for (x,y) in dataloader:
        model.eval()
        with torch.no_grad():
            logits= model(x)
        predicted_labels= torch.argmax(logits,dim=1)
        predicted_labels= predicted_labels==y 
        correct_labels+= torch.sum(predicted_labels)
        total_labels+= len(predicted_labels)
    return (correct_labels/total_labels).item()

In [168]:
compute_accuracy(model,test_loader)

1.0

# Save and Load

In [169]:
model.state_dict()

OrderedDict([('layers.0.weight',
              tensor([[ 0.7017, -0.1831],
                      [-0.2414,  0.1945],
                      [ 0.2748, -0.3816],
                      [ 0.3879,  0.1403],
                      [ 0.5233, -0.3944],
                      [ 0.6558, -0.2588],
                      [ 0.6765,  0.4306],
                      [ 0.4291, -0.1179],
                      [-0.1032, -0.4993],
                      [-0.1749,  0.3715],
                      [-0.2227, -0.4528],
                      [ 0.3145,  0.3721],
                      [-0.4028,  0.3713],
                      [ 0.4374,  0.1186],
                      [-0.5328, -0.3546],
                      [-0.3286, -0.3685],
                      [-0.3631, -0.0938],
                      [-0.1297, -0.3198],
                      [ 0.6690,  0.0767],
                      [ 0.6646, -0.1820],
                      [-0.5471, -0.6472],
                      [ 0.3296, -0.4531],
                      [ 0.2784, -0.6826],
 

In [170]:
# save the model in ./model.pth
torch.save(model.state_dict(),"model.pth")  

In [173]:
model_=NeuralNetwork(2,2)
model_.load_state_dict(torch.load("model.pth",weights_only=True))

<All keys matched successfully>

# Train in GPU

In [176]:
torch.backends.mps.is_available()
#torch.cuda.is_available()

True

In [179]:
tensor_1= torch.tensor([1,2,3])
tensor_2= torch.tensor([4,5,6])

In [180]:
print(tensor_1 + tenso_2)

tensor([5, 7, 9])


In [182]:
tensor_1.device , tensor_2.device

(device(type='cpu'), device(type='cpu'))

In [184]:
tensor_1_gpu=tensor_1.to("mps")
tensor_2_gpu=tensor_2.to("mps")

In [186]:
tensor_1_gpu + tensor_1_gpu

tensor([2, 4, 6], device='mps:0')

In [187]:
model= NeuralNetwork(2,2)
model= model.to("mps")

optimizer= torch.optim.SGD(model.parameters(),lr=0.5)

epochs=3

for epoch in range(epochs):
    model.train()
    for i,(x,y) in enumerate(train_loader):
        x,y= x.to("mps"), y.to("mps")
        logits= model(x)
        loss= F.cross_entropy(logits,y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch+1}/{epochs} | Batch {i}/{len(train_loader)} | Train Loss {loss:.2f} ")

        

Epoch 1/3 | Batch 0/2 | Train Loss 0.75 
Epoch 1/3 | Batch 1/2 | Train Loss 0.24 
Epoch 2/3 | Batch 0/2 | Train Loss 0.05 
Epoch 2/3 | Batch 1/2 | Train Loss 0.05 
Epoch 3/3 | Batch 0/2 | Train Loss 0.01 
Epoch 3/3 | Batch 1/2 | Train Loss 0.01 


In [195]:
model.layers[0].weight

Parameter containing:
tensor([[-0.3919,  0.6465],
        [ 0.2414, -0.3386],
        [ 0.7003, -0.0879],
        [ 0.2894, -0.6232],
        [-0.1410,  0.5893],
        [ 0.3068,  0.3856],
        [-0.4816, -0.0227],
        [ 0.6579, -0.1860],
        [ 0.0480,  0.0899],
        [ 0.5136,  0.6083],
        [ 0.0649,  0.4637],
        [-0.2341, -0.5823],
        [ 0.4045, -0.3357],
        [ 0.5693, -0.0379],
        [-0.6205, -0.1499],
        [-0.5333, -0.4889],
        [-0.2865,  0.1733],
        [-0.2331,  0.5681],
        [ 0.0653,  0.6471],
        [ 0.5679, -0.6270],
        [ 0.2700, -0.3539],
        [ 0.6353, -0.6642],
        [-0.2211,  0.4684],
        [-0.4319, -0.4663],
        [ 0.0288,  0.6920],
        [ 0.6842, -0.3701],
        [ 0.6187,  0.0133],
        [ 0.6674, -0.5207],
        [-0.4751, -0.0205],
        [-0.5889,  0.0441]], device='mps:0', requires_grad=True)