In [1]:
import torch
print(torch.__version__)

2.6.0


In [15]:
# create a 0D tensor (scalar) from a Python integer  
tensor0d = torch.tensor(1)

# create a 1D tensor (vector) from a Python list
tensor1d = torch.tensor([1, 2, 3])

# create a 2D tensor from a nested Python list
tensor2d = torch.tensor([[1, 2, 5], [3, 4, 9]])

# create a 3D tensor from a nested Python list
tensor3d = torch.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

In [3]:
tensor0d

tensor(1)

In [4]:
tensor1d

tensor([1, 2, 3])

In [16]:
tensor2d

tensor([[1, 2, 5],
        [3, 4, 9]])

In [6]:
tensor3d

tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])

In [7]:
tensor1d.dtype

torch.int64

In [8]:
floatvec = torch.tensor([9.2, 4.9, 1.3])

In [9]:
floatvec

tensor([9.2000, 4.9000, 1.3000])

In [10]:
floatvec.dtype

torch.float32

In [11]:
tensor1d.to(torch.float32)

tensor([1., 2., 3.])

In [20]:
tensor2d

tensor([[1, 2, 5],
        [3, 4, 9]])

In [27]:
# reshape tensor
tensor2d.reshape(3,2)

tensor([[1, 2],
        [5, 3],
        [4, 9]])

In [29]:
# another way to reshape tensor (more commonly used)
tensor2d.view(3,2)

tensor([[1, 2],
        [5, 3],
        [4, 9]])

In [31]:
# reshape does not affect original tensor
tensor2d

tensor([[1, 2, 5],
        [3, 4, 9]])

In [33]:
# flatten a tensor
tensor2d.flatten()

tensor([1, 2, 5, 3, 4, 9])

In [34]:
# transpose of a tensor
tensor2d.T

tensor([[1, 3],
        [2, 4],
        [5, 9]])

In [24]:
# matrix multiplication
tensor2d.matmul(tensor2d.T)

tensor([[ 30,  56],
        [ 56, 106]])

In [26]:
# another way of matrix multiplication
tensor2d @ tensor2d.T

tensor([[ 30,  56],
        [ 56, 106]])

### PyTorch's automatic differentiation engine (autograd)

In [37]:
# autograd example
x = torch.tensor(2.0, requires_grad=True) # requires_grad asks PyTorch to track gradient through operations involving x
y = x**2 + 3*x + 1 
y.backward() # compute derivative of y with respect to x: 2*x+3
x.grad # calculate the gradient

tensor(7.)

In [94]:
# an example of a logistic regression 
# partial derivative: the rate at which a function changes w.r.t. one variable
# gradient: a vector of all partial derivatives

import torch.nn.functional as F
from torch.autograd import grad # compute gradients manually

y = torch.tensor([1.0]) # target label
x1 = torch.tensor([1.1])
w1 = torch.tensor([2.2], requires_grad=True)
b = torch.tensor([0.0], requires_grad=True)

z = x1 * w1 + b
a = torch.sigmoid(z) # activation & output

loss = F.binary_cross_entropy(a, y)

In [78]:
# manual gradient calculation
# pytorch uses autograd for this via chain rule
grad_L_w1 = grad(loss, w1, retain_graph=True)
grad_L_b = grad(loss, b, retain_graph=True)

print(grad_L_w1)
print(grad_L_b)

(tensor([-0.0898]),)
(tensor([-0.0817]),)


In [97]:
loss.backward(retain_graph=True) # gradients will be accumulated if rerunning this
print(w1.grad)
print(b.grad)

tensor([-0.2695])
tensor([-0.2450])


### implement a neural network module

In [98]:
class NeuralNetwork(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
        self.layers = torch.nn.Sequential(

            # 1st hidden layer
            torch.nn.Linear(num_inputs, 30),
            torch.nn.ReLU(),

            # 2nd hidden layer
            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),

            # output layer
            torch.nn.Linear(20, num_outputs),
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

In [99]:
model = NeuralNetwork(50, 3)

In [100]:
print(model)

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)


In [107]:
# total number of parameters
sum(p.numel() for p in model.parameters())

2213

In [106]:
# total number of trainable parameters
# part of the model may be frozen for some reason
sum(p.numel() for p in model.parameters() if p.requires_grad)

2213

In [111]:
for name, param in model.named_parameters():
    print(f"{name}: {param.numel()} parameters")

layers.0.weight: 1500 parameters
layers.0.bias: 30 parameters
layers.2.weight: 600 parameters
layers.2.bias: 20 parameters
layers.4.weight: 60 parameters
layers.4.bias: 3 parameters


In [112]:
model.layers

Sequential(
  (0): Linear(in_features=50, out_features=30, bias=True)
  (1): ReLU()
  (2): Linear(in_features=30, out_features=20, bias=True)
  (3): ReLU()
  (4): Linear(in_features=20, out_features=3, bias=True)
)

In [117]:
# each neuron is placed in a row to get dot product with the input features
# so for weights: # of rows = # of neurons
# but to do matrix multiplication, use transpose of weight
model.layers[0].weight.shape

torch.Size([30, 50])

In [118]:
model.layers[0].bias.shape

torch.Size([30])

In [119]:
# if we want to reproduce the random initial values
torch.manual_seed(123)
model = NeuralNetwork(50, 3)
print(model.layers[0].weight)

Parameter containing:
tensor([[-0.0577,  0.0047, -0.0702,  ...,  0.0222,  0.1260,  0.0865],
        [ 0.0502,  0.0307,  0.0333,  ...,  0.0951,  0.1134, -0.0297],
        [ 0.1077, -0.1108,  0.0122,  ...,  0.0108, -0.1049, -0.1063],
        ...,
        [-0.0787,  0.1259,  0.0803,  ...,  0.1218,  0.1303, -0.1351],
        [ 0.1359,  0.0175, -0.0673,  ...,  0.0674,  0.0676,  0.1058],
        [ 0.0790,  0.1343, -0.0293,  ...,  0.0344, -0.0971, -0.0509]],
       requires_grad=True)


In [120]:
torch.manual_seed(123)
X = torch.rand(1, 50)
out = model(X)
print(out)

tensor([[-0.1262,  0.1080, -0.1792]], grad_fn=<AddmmBackward0>)


In [123]:
# during inference (making predictions)
# use this context manager to tell PyTorch not to track gradients
with torch.no_grad():
    out = torch.softmax(model(X), dim=1)
print(out)

tensor([[0.3113, 0.3934, 0.2952]])


In [129]:
# create toy datasets
X_train = torch.tensor([
    [-1.2, 3.1],
    [-0.9, 2.9],
    [-0.5, 2.6],
    [2.3, -1.1],
    [2.7, -1.5], # note the use of trailing comma
])
y_train = torch.tensor([0, 0, 0, 1, 1]) # PyTorch requires class labels to start from 0
X_test = torch.tensor([
    [-0.8, 2.8],
    [2.6, -1.6],
])
y_test = torch.tensor([0, 1])


In [158]:
from torch.utils.data import Dataset # standard interface to use with DataLoader

class ToyDataset(Dataset):
    def __init__(self, X, y):
        self.features = X
        self.labels = y
    def __getitem__(self, index):
        one_x = self.features[index]
        one_y = self.labels[index]
        return one_x, one_y
    def __len__(self):
        return self.labels.shape[0]

train_ds = ToyDataset(X_train, y_train)
test_ds = ToyDataset(X_test, y_test)

In [166]:
from torch.utils.data import DataLoader
torch.manual_seed(123) # note the scope of this random seed only affects the initial random state

train_loader = DataLoader(
    dataset = train_ds,
    batch_size=2, # how many samples per batch
    shuffle=True, # shuffle the dataset each epoch
    num_workers=0, # to use multiple cpu workers to prepare data
    drop_last=True,
)

test_loader = DataLoader(
    dataset=test_ds,
    batch_size=2,
    shuffle=True,
    num_workers=0,
)

In [167]:
for idx, (x, y) in enumerate(train_loader): # note rerunning this will give different results due to shuffle=True
    print(f"Batch {idx+1}:", x, y)

Batch 1: tensor([[ 2.3000, -1.1000],
        [-0.9000,  2.9000]]) tensor([1, 0])
Batch 2: tensor([[-1.2000,  3.1000],
        [-0.5000,  2.6000]]) tensor([0, 0])


### A typical training loop

In [181]:
import torch.nn.functional as F

torch.manual_seed(123)
model = NeuralNetwork(num_inputs=2, num_outputs=2)
optimizer = torch.optim.SGD(model.parameters(), lr=0.5)

num_epochs = 3

for epoch in range(num_epochs):

    model.train() # more useful when having dropout, batch normalization layers
    for batch_idx, (features, labels) in enumerate(train_loader):
        
        logits = model(features)
        loss = F.cross_entropy(logits, labels)
        optimizer.zero_grad() # important to reset gradient to zero (default is to accumulate gradients)
        loss.backward()
        optimizer.step()

        print(f"Epoch: {epoch+1:03d}/{num_epochs:03d}"
              f" | Batch {batch_idx+1:03d}/{len(train_loader):03d}"
              f" | Train/Val loss: {loss:.2f}")

    model.eval()


Epoch: 001/003 | Batch 001/002 | Train/Val loss: 0.75
Epoch: 001/003 | Batch 002/002 | Train/Val loss: 0.65
Epoch: 002/003 | Batch 001/002 | Train/Val loss: 0.44
Epoch: 002/003 | Batch 002/002 | Train/Val loss: 0.13
Epoch: 003/003 | Batch 001/002 | Train/Val loss: 0.03
Epoch: 003/003 | Batch 002/002 | Train/Val loss: 0.00


In [183]:
with torch.no_grad():
    outputs = model(X_train)
print(outputs)

probas = torch.softmax(outputs, dim=1)
print(probas)

predictions = torch.argmax(probas, dim=1)
print(predictions)


tensor([[ 2.8569, -4.1618],
        [ 2.5382, -3.7548],
        [ 2.0944, -3.1820],
        [-1.4814,  1.4816],
        [-1.7176,  1.7342]])
tensor([[9.9911e-01, 8.9419e-04],
        [9.9815e-01, 1.8458e-03],
        [9.9491e-01, 5.0852e-03],
        [4.9127e-02, 9.5087e-01],
        [3.0714e-02, 9.6929e-01]])
tensor([0, 0, 0, 1, 1])


In [186]:
# how to save a model to reuse it later?
torch.save(model.state_dict(), "model.pth") # save model trained parameters (pth stands for pytorch)

In [185]:
# load the model
model = NeuralNetwork(2, 2)
model.load_state_dict(torch.load("model.pth", weights_only=True))

<All keys matched successfully>