## Introduction to PyTorch

### Installing PyTorch

In [1]:
import torch

def to_onehot(y, num_classes):
    y_onehot = torch.zeros(y.size(0), num_classes)
    y_onehot.scatter_(1, y.view(-1, 1).long(), 1).float()
    return y_onehot

y = torch.tensor([0, 1, 2, 2])

y_enc = to_onehot(y, 3)

print('one-hot encoding:\n', y_enc)

  from .autonotebook import tqdm as notebook_tqdm
one-hot encoding:
 tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 0., 1.]])


### Understanding tensors

In [4]:
import torch

tensor0d = torch.tensor(1)
tensor1d = torch.tensor([1, 2, 3])
tensor2d = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
tensor3d = torch.tensor([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

print(tensor3d)
print(tensor3d.shape)

print(tensor1d.dtype)
print(tensor2d.dtype)
print(tensor3d.to(torch.float32).dtype)

print(tensor2d.T)
print(tensor2d @ tensor2d.T)

tensor([[[ 1,  2,  3],
         [ 4,  5,  6]],

        [[ 7,  8,  9],
         [10, 11, 12]]])
torch.Size([2, 2, 3])
torch.int64
torch.float32
torch.float32
tensor([[1., 4.],
        [2., 5.],
        [3., 6.]])
tensor([[14., 32.],
        [32., 77.]])


### Computing gradients via autograd

In [13]:
import torch
import torch.nn.functional as F
from torch.autograd import grad

y = torch.tensor([1.0])
x1 = torch.tensor([1.1])
w1 = torch.tensor([2.2], requires_grad=True)
b = torch.tensor([0.0], requires_grad=True)

z = x1 * w1 + b
a = torch.sigmoid(z)

loss = F.binary_cross_entropy(a, y)

# grad_L_w1 = grad(loss, w1, retain_graph=True)
# print(grad_L_w1)
# grad_L_b = grad(loss, b, retain_graph=True)
# print(grad_L_b)

loss.backward()
print(w1.grad)
print(b.grad)

tensor([-0.0898])
tensor([-0.0817])


### Implementing multilayer neural networks

In [21]:
import torch

class NeuralNetwork(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
        self.layers = torch.nn.Sequential(
            torch.nn.Linear(num_inputs, 30),
            torch.nn.ReLU(),
            torch.nn.Linear(30, 20),
            # torch.nn.ReLU(),
            # torch.nn.Linear(20, 30),
            # torch.nn.ReLU(),
            # torch.nn.Linear(30, 20),
            torch.nn.ReLU(),
            torch.nn.Linear(20, num_outputs),
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

torch.manual_seed(42)
model = NeuralNetwork(num_inputs=50, num_outputs=3)
print(model)

X = torch.rand((1, 50))
out = model(X)
print(out)

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=30, bias=True)
    (5): ReLU()
    (6): Linear(in_features=30, out_features=20, bias=True)
    (7): ReLU()
    (8): Linear(in_features=20, out_features=3, bias=True)
  )
)
tensor([[-0.0926,  0.0474,  0.0505]], grad_fn=<AddmmBackward0>)


### Setting up efficient data loaders

In [31]:
import torch
from torch.utils.data import Dataset, DataLoader
X_train = torch.tensor([
    [-1.2, 3.1],
    [-0.9, 2.9],
    [-0.5, 2.6],
    [2.3, -1.1],
    [2.7, -1.5]
])
y_train = torch.tensor([0, 0, 0, 1, 1])

X_test = torch.tensor([
    [-0.8, 2.8],
    [2.6, -1.6]
])
y_test = torch.tensor([0, 1])


class ToyDataset(Dataset):
    def __init__(self, X, y):
        self.features = X
        self.labels = y

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

train_dataset = ToyDataset(X_train, y_train)
test_dataset = ToyDataset(X_test, y_test)

print(train_dataset[0])

train_data_loader = DataLoader(
    train_dataset,
    batch_size=2,
    shuffle=True,
    num_workers=0,
    drop_last=True
)
test_data_loader = DataLoader(
    test_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=0,
    drop_last=True
)


for index, (X, y) in enumerate(train_data_loader):
    print(X)
    print(y)

(tensor([-1.2000,  3.1000]), tensor(0))
tensor([[ 2.7000, -1.5000],
        [-0.9000,  2.9000]])
tensor([1, 0])
tensor([[-0.5000,  2.6000],
        [ 2.3000, -1.1000]])
tensor([0, 1])


### A typical training loop

In [19]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

class NeuralNetwork(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
        self.layers = torch.nn.Sequential(
            torch.nn.Linear(num_inputs, 30),
            torch.nn.ReLU(),
            torch.nn.Linear(30, 20),
            # torch.nn.ReLU(),
            # torch.nn.Linear(20, 30),
            # torch.nn.ReLU(),
            # torch.nn.Linear(30, 20),
            torch.nn.ReLU(),
            torch.nn.Linear(20, num_outputs),
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

class ToyDataset(Dataset):
    def __init__(self, X, y):
        self.features = X
        self.labels = y

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

def compute_accuracy(model, data_loader):
    model.eval()
    correct = 0.0
    total_example = 0
    for index, (features, labels) in enumerate(data_loader):
        with torch.no_grad():
            logits = model(features)
        predictions = torch.argmax(logits, dim=1)
        compare = predictions == labels
        correct += torch.sum(compare)
        total_example += len(compare)
    return (correct/total_example).item()
# Set random seed
torch.manual_seed(42)

# Prepare the datasets
X_train = torch.tensor([
    [-1.2, 3.1],
    [-0.9, 2.9],
    [-0.5, 2.6],
    [2.3, -1.1],
    [2.7, -1.5]
])
y_train = torch.tensor([0, 0, 0, 1, 1])

X_test = torch.tensor([
    [-0.8, 2.8],
    [2.6, -1.6]
])
y_test = torch.tensor([0, 1])

train_dataset = ToyDataset(X_train, y_train)
test_dataset = ToyDataset(X_test, y_test)
train_data_loader = DataLoader(
    train_dataset,
    batch_size=2,
    shuffle=True,
    num_workers=0,
    drop_last=True
)
test_data_loader = DataLoader(
    test_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=0,
    drop_last=True
)

model = NeuralNetwork(num_inputs=2, num_outputs=2)
optimizer = torch.optim.SGD(model.parameters(), lr=0.5)

max_epoch = 3

for epoch in range(max_epoch):
    model.train()
    for batch_index, (features, labels) in enumerate(train_data_loader):
        # Compute prediction
        logits = model(features)
        loss = F.cross_entropy(logits, labels)
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f"Epoch: {epoch+1:03d}/{max_epoch:03d}"
              f" | Batch: {batch_index+1:03d}/{len(train_data_loader):03d}"
              f" | Loss: {loss:.4f}"
        )
model.eval()

# with torch.no_grad():
#     outputs = model(X_test)
# print(outputs)
# torch.set_printoptions(sci_mode=False)
# # probas = torch.softmax(outputs, dim=1)
# # print(probas)
# # predictions = torch.argmax(probas, dim=1)
# # print(predictions)
# predictions = torch.argmax(outputs, dim=1)
# print(predictions)

print(compute_accuracy(model, train_data_loader))
print(compute_accuracy(model, test_data_loader))

torch.save(model.state_dict(), "model.pth")

model = NeuralNetwork(2,2)
model.load_state_dict(torch.load("model.pth"))

print(compute_accuracy(model, train_data_loader))
print(compute_accuracy(model, test_data_loader))

Epoch: 001/003 | Batch: 001/002 | Loss: 0.6117
Epoch: 001/003 | Batch: 002/002 | Loss: 0.3457
Epoch: 002/003 | Batch: 001/002 | Loss: 0.2022
Epoch: 002/003 | Batch: 002/002 | Loss: 0.0770
Epoch: 003/003 | Batch: 001/002 | Loss: 0.0348
Epoch: 003/003 | Batch: 002/002 | Loss: 0.0147
1.0
1.0
1.0
1.0


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=968d3c27-50e7-4d42-bdd9-442f6904c1c2' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>