# What's PyTorch

In [1]:
from torch.autograd import grad
from torch.utils.data import Dataset,DataLoader
import torch,numpy as np,torch.nn.functional as F
torch.__version__

'2.9.1'

In [2]:
torch.mps.is_available()

True

# Understanding Tensors
## Scalars, Vectors, Matrices, Tensors

In [3]:
tensor0d=torch.tensor(1)
tensor1d=torch.tensor([1,2,3])
tensor2d=torch.tensor([[1,2],
                       [3,4]])
tensor3d_1=torch.tensor([[[1,2],
                          [3,4]], 
                         [[5,6],
                          [7,8]]])
ary3d=np.array([[[1,2],
                 [3,4]], 
                [[5,6],
                 [7,8]]])
tensor3d_2=torch.tensor(ary3d)
tensor3d_3=torch.from_numpy(ary3d)
ary3d[0,0,0]=999
tensor3d_2

tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])

In [4]:
tensor3d_3

tensor([[[999,   2],
         [  3,   4]],

        [[  5,   6],
         [  7,   8]]])

## Tensor Data Types

In [5]:
tensor1d.dtype

torch.int64

In [6]:
floatvec=tensor1d.to(torch.float32)
floatvec.dtype

torch.float32

## Common PyTorch Tensor Ops

In [7]:
tensor2d=torch.tensor([[1,2,3],
                       [4,5,6]])
tensor2d

tensor([[1, 2, 3],
        [4, 5, 6]])

In [8]:
tensor2d.shape

torch.Size([2, 3])

In [9]:
tensor2d.reshape(3,2)

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [10]:
tensor2d.T

tensor([[1, 4],
        [2, 5],
        [3, 6]])

In [11]:
tensor2d@tensor2d.T

tensor([[14, 32],
        [32, 77]])

# Seeing Models As Computation Graphs

In [12]:
y=torch.tensor([1.])
x1=torch.tensor([1.1])
w1=torch.tensor([2.2])
b=torch.tensor([0])
z=x1*w1+b
a=torch.sigmoid(z)
loss=F.binary_cross_entropy(a,y)
loss

tensor(0.0852)

# Automatic Differentiation Made Easy

In [13]:
w1=torch.tensor([2.2],requires_grad=True)
b=torch.tensor([0.],requires_grad=True)
z=x1*w1+b
a=torch.sigmoid(z)
loss=F.binary_cross_entropy(a,y)
grad_L_w1=grad(loss,w1,retain_graph=True)
grad_L_b=grad(loss,b,retain_graph=True)
grad_L_w1

(tensor([-0.0898]),)

In [14]:
grad_L_b

(tensor([-0.0817]),)

In [15]:
loss.backward()
w1.grad

tensor([-0.0898])

In [16]:
b.grad

tensor([-0.0817])

# Implementing Multilayer NNs

In [17]:
class NeuralNetwork(torch.nn.Module):
    def __init__(self,num_inputs,num_outputs):
        super().__init__()
        self.layers=torch.nn.Sequential(torch.nn.Linear(num_inputs,30),
                                        torch.nn.ReLU(),
                                        torch.nn.Linear(30,20),
                                        torch.nn.ReLU(),
                                        torch.nn.Linear(20,num_outputs))
    def forward(self,x):
        logits=self.layers(x)
        return logits
model=NeuralNetwork(50,3)
model

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)

In [18]:
num_params=sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Total number of trainable model parameters: {num_params}')

Total number of trainable model parameters: 2213


In [19]:
model.layers[0].weight

Parameter containing:
tensor([[-0.0516,  0.0729, -0.0633,  ...,  0.0219,  0.0363, -0.0357],
        [-0.0156, -0.1322, -0.1360,  ...,  0.0017,  0.0145, -0.0393],
        [-0.1143,  0.0015, -0.1227,  ...,  0.0843,  0.0757,  0.1254],
        ...,
        [ 0.0274, -0.0193,  0.0603,  ...,  0.0750,  0.0511, -0.0377],
        [ 0.0050,  0.1054, -0.0262,  ..., -0.0438, -0.0287, -0.0311],
        [-0.0299, -0.1240, -0.0007,  ..., -0.0438, -0.0189,  0.0180]],
       requires_grad=True)

In [20]:
model.layers[0].weight.shape

torch.Size([30, 50])

In [21]:
X=torch.rand((1,50))
out=model(X)
out

tensor([[-0.2703,  0.2068,  0.2584]], grad_fn=<AddmmBackward0>)

In [22]:
with torch.no_grad():
    out=model(X)
out

tensor([[-0.2703,  0.2068,  0.2584]])

In [23]:
with torch.no_grad():
    out=torch.softmax(model(X),dim=1)
out

tensor([[0.2321, 0.3740, 0.3938]])

# Setting Up Efficient Data Loaders

In [24]:
X_train=torch.tensor([[-1.2,3.1],
                      [-.9,2.9],
                      [-.5,2.6],
                      [2.3,-1.1],
                      [2.7,-1.5]])
y_train=torch.tensor([0,0,0,1,1])
X_test=torch.tensor([[-.8,2.8],
                     [2.6,-1.6]])
y_test=torch.tensor([0,1])
class ToyDataset(Dataset):
    def __init__(self,X,y):
        self.features=X
        self.labels=y
    def __getitem__(self,index):
        one_x=self.features[index]
        one_y=self.labels[index]
        return one_x,one_y
    def __len__(self):
        return self.labels.shape[0]
train_ds=ToyDataset(X_train,y_train)
test_ds=ToyDataset(X_test,y_test)
len(train_ds)

5

In [25]:
train_loader=DataLoader(dataset=train_ds,
                        batch_size=2,
                        shuffle=True,
                        num_workers=0)
test_loader=DataLoader(dataset=test_ds,
                       batch_size=2,
                       shuffle=False,
                       num_workers=0)
for idx,(x,y) in enumerate(train_loader):
    print(f'Batch {idx+1}:\n{x}\n{y}')

Batch 1:
tensor([[-1.2000,  3.1000],
        [-0.9000,  2.9000]])
tensor([0, 0])
Batch 2:
tensor([[ 2.7000, -1.5000],
        [ 2.3000, -1.1000]])
tensor([1, 1])
Batch 3:
tensor([[-0.5000,  2.6000]])
tensor([0])


In [26]:
train_loader=DataLoader(dataset=train_ds,
                        batch_size=2,
                        shuffle=True,
                        num_workers=0,
                        drop_last=True)
for idx,(x,y) in enumerate(train_loader):
    print(f'Batch {idx+1}:\n{x}\n{y}')

Batch 1:
tensor([[-0.9000,  2.9000],
        [-1.2000,  3.1000]])
tensor([0, 0])
Batch 2:
tensor([[-0.5000,  2.6000],
        [ 2.3000, -1.1000]])
tensor([0, 1])


# Typical Training Loop

In [27]:
model=NeuralNetwork(num_inputs=2,
                    num_outputs=2)
optimizer=torch.optim.SGD(model.parameters(),lr=.5)
num_epochs=3
for epoch in range(num_epochs):
    model.train()
    for batch_idx,(features,labels) in enumerate(train_loader):
        logits=model(features)
        loss=F.cross_entropy(logits,labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f'Epoch: {epoch+1}/{num_epochs} | Batch {batch_idx}/{len(train_loader)} Train/val loss: {loss:.2f}')
    model.eval()

Epoch: 1/3 | Batch 0/2 Train/val loss: 0.70
Epoch: 1/3 | Batch 1/2 Train/val loss: 0.42
Epoch: 2/3 | Batch 0/2 Train/val loss: 0.13
Epoch: 2/3 | Batch 1/2 Train/val loss: 0.31
Epoch: 3/3 | Batch 0/2 Train/val loss: 0.01
Epoch: 3/3 | Batch 1/2 Train/val loss: 0.02


In [28]:
model.eval()

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=2, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=2, bias=True)
  )
)

In [29]:
with torch.no_grad():
    outputs=model(X_train)
outputs

tensor([[ 2.3375, -3.0137],
        [ 2.1916, -2.8197],
        [ 1.9716, -2.5289],
        [-1.8085,  2.1052],
        [-2.1409,  2.5453]])

In [30]:
torch.set_printoptions(sci_mode=False)
probas=torch.softmax(outputs,dim=1)
probas

tensor([[0.9953, 0.0047],
        [0.9934, 0.0066],
        [0.9890, 0.0110],
        [0.0196, 0.9804],
        [0.0091, 0.9909]])

In [31]:
predictions=torch.argmax(probas,dim=1)
predictions

tensor([0, 0, 0, 1, 1])

In [32]:
predictions==y_train

tensor([True, True, True, True, True])

In [33]:
torch.sum(predictions==y_train)

tensor(5)

In [34]:
def compute_accuracy(model,dataloader):
    model=model.eval()
    correct=0
    total_examples=0
    for idx,(features,labels) in enumerate(dataloader):
        with torch.no_grad():
            logits=model(features)
        predictions=torch.argmax(logits,dim=1)
        compare=labels==predictions
        correct+=torch.sum(compare)
        total_examples+=len(compare)
    return (correct/total_examples).item()
compute_accuracy(model,train_loader)

1.0

# Saving, Loading Models

In [35]:
torch.save(model.state_dict(),'model.pth')
model=NeuralNetwork(2,2)
model.load_state_dict(torch.load('model.pth',weights_only=True))

<All keys matched successfully>

# Optimizing Training Performance With GPUs
## PyTorch Computations On GPU Devices

In [36]:
tensor_1=torch.tensor([1,2,3])
tensor_2=torch.tensor([4,5,6])
tensor_1+tensor_2

tensor([5, 7, 9])

In [37]:
tensor_1=tensor_1.to('mps')
tensor_2=tensor_2.to('mps')
tensor_1+tensor_2

tensor([5, 7, 9], device='mps:0')

## Single-GPU Training

In [38]:
model=NeuralNetwork(num_inputs=2,
                    num_outputs=2)
device=torch.device('mps')
model.to(device)
optimizer=torch.optim.SGD(model.parameters(),lr=.5)
for epoch in range(num_epochs):
    model.train()
    for batch_idx,(features,labels) in enumerate(train_loader):
        features,labels=features.to(device),labels.to(device)
        logits=model(features)
        loss=F.cross_entropy(logits,labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f'Epoch: {epoch+1}/{num_epochs} | Batch {batch_idx}/{len(train_loader)} | Train/val loss: {loss:.2f}')
    model.eval()

Epoch: 1/3 | Batch 0/2 | Train/val loss: 0.71
Epoch: 1/3 | Batch 1/2 | Train/val loss: 0.47
Epoch: 2/3 | Batch 0/2 | Train/val loss: 0.00
Epoch: 2/3 | Batch 1/2 | Train/val loss: 0.06
Epoch: 3/3 | Batch 0/2 | Train/val loss: 0.00
Epoch: 3/3 | Batch 1/2 | Train/val loss: 0.05


In [39]:
model=NeuralNetwork(2,2)
num_params=sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Total number of trainable model parameters: {num_params}')

Total number of trainable model parameters: 752


In [40]:
a=torch.rand(100,200)
b=torch.rand(200,300)
%timeit a@b

17.5 μs ± 118 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [41]:
a,b=a.to('mps'),b.to('mps')
%timeit a@b

22.3 μs ± 1.24 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
