In [1]:
import torch
import numpy as np

In [2]:
w = torch.tensor([2.0], requires_grad=True)
b = torch.tensor([1.0], requires_grad=True)

# Input
x = torch.tensor([3.0])

In [3]:
# Forward pass
y_pred = w * x + b
print("y_pred:", {y_pred.item()})

y_pred: {7.0}


In [4]:
# Loss
y_true = torch.tensor([10.0])
loss = (y_pred - y_true) ** 2
print("Loss:", {loss.item()})

Loss: {9.0}


In [5]:
print("Avant backward: w.grad =", w.grad)

Avant backward: w.grad = None


In [6]:
loss.backward()

In [7]:
print("Après backward: w.grad =", w.grad)
print("Apres backward: b.grad =", b.grad)

Après backward: w.grad = tensor([-18.])
Apres backward: b.grad = tensor([-6.])


In [8]:
learning_rate = 0.01
with torch.no_grad(): # Désactiver le tracking des gradients pour l'update w = w + alpha * w
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

In [9]:
print("Nouveau poids : w =", w, "b =", b)

Nouveau poids : w = tensor([2.1800], requires_grad=True) b = tensor([1.0600], requires_grad=True)


In [10]:
# Réinitialiser les gradients
w.grad.zero_()
b.grad.zero_()
print("Après zero_(): w.grad =", w.grad, "b.grad =", b.grad)

Après zero_(): w.grad = tensor([0.]) b.grad = tensor([0.])


In [11]:
import torch.nn as nn

perceptron = nn.Linear(1, 1)

# Voir les poids initiaux
print("Poids :", perceptron.weight)
print("Biais :", perceptron.bias)

Poids : Parameter containing:
tensor([[-0.5861]], requires_grad=True)
Biais : Parameter containing:
tensor([-0.7771], requires_grad=True)


In [12]:
# Forward
x = torch.tensor([3.0]) # Shape (batch_size, features)
print(f"input : {x}")
y_pred = perceptron(x)
print(y_pred)


input : tensor([3.])
tensor([-2.5355], grad_fn=<ViewBackward0>)


In [13]:
# Loss
y_true = torch.tensor([10.0])
criterion = nn.MSELoss()
loss = criterion(y_pred, y_true)

In [14]:
loss

tensor(157.1384, grad_fn=<MseLossBackward0>)

In [15]:
# Backward
loss.backward()

In [16]:
print(f"Gradient de poids : {perceptron.weight.grad}")
print(f"Gradient de biais : {perceptron.bias.grad}")

Gradient de poids : tensor([[-75.2129]])
Gradient de biais : tensor([-25.0710])


In [17]:
import torch.optim as optim
optimizer = optim.SGD(perceptron.parameters(), lr=0.01)

In [18]:
optimizer.step()

In [19]:
print("Nouveau poids :", perceptron.weight)
print("Nouveau biais :", perceptron.bias)

Nouveau poids : Parameter containing:
tensor([[0.1660]], requires_grad=True)
Nouveau biais : Parameter containing:
tensor([-0.5264], requires_grad=True)


In [20]:
optimizer.zero_grad()

In [21]:
print(f"Poids après zero_grad : {perceptron.weight.grad}")
print(f"Biais après zero_grad : {perceptron.bias.grad}")

Poids après zero_grad : None
Biais après zero_grad : None


In [22]:
class SimpleNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2, 4)
        self.fc2 = nn.Linear(4, 3)
        self.fc3 = nn.Linear(3, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

In [23]:
model = SimpleNet()

In [24]:
model

SimpleNet(
  (fc1): Linear(in_features=2, out_features=4, bias=True)
  (fc2): Linear(in_features=4, out_features=3, bias=True)
  (fc3): Linear(in_features=3, out_features=1, bias=True)
  (relu): ReLU()
  (sigmoid): Sigmoid()
)

In [25]:
x = torch.tensor([1.0, 2.0])
y = torch.tensor([5.0])
y_pred = model(x)
loss = nn.MSELoss()(y_pred, y)

loss.backward()

In [26]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Compose, Normalize
import matplotlib.pyplot as plt


In [27]:
training_data = datasets.MNIST(
    root='data',
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.MNIST(
    root='data',
    train=False,
    download=True,
    transform=ToTensor()
)



In [28]:
training_dataloader = DataLoader(training_data, batch_size=64, shuffle=True, pin_memory=True, num_workers=4)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=False, pin_memory=True, num_workers=4)

In [29]:
print(training_data.classes)
print(training_data.data.size())

['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
torch.Size([60000, 28, 28])


In [30]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.fc(x)
        return logits

In [31]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=10, bias=True)
  )
)


In [32]:
x = torch.rand(1, 28, 28, device=device)
logits = model(x)

In [33]:
logits

tensor([[ 0.0437,  0.0336, -0.0661, -0.0318, -0.0630,  0.0679, -0.0512, -0.0527,
          0.1039, -0.1335]], grad_fn=<AddmmBackward0>)

In [34]:
pred_probab = nn.Softmax(dim=1)(logits)
pred_probab

tensor([[0.1058, 0.1047, 0.0948, 0.0981, 0.0951, 0.1084, 0.0962, 0.0961, 0.1123,
         0.0886]], grad_fn=<SoftmaxBackward0>)

In [35]:
y_pred = pred_probab.argmax(dim=1)
y_pred


tensor([8])

In [55]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)

In [56]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch_idx, batch_value in enumerate(dataloader):
        X, y = batch_value
        X, y = X.to(device), y.to(device)

        # Forward pass
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backward pass
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch_idx % 100 == 0:
            loss, current = loss.item(), (batch_idx+1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5}]")


In [57]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
            

In [58]:
epochs = 30
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(training_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------




loss: 0.257722  [   64/60000]
loss: 0.317461  [ 6464/60000]
loss: 0.289523  [12864/60000]
loss: 0.095424  [19264/60000]
loss: 0.110563  [25664/60000]
loss: 0.138368  [32064/60000]
loss: 0.066612  [38464/60000]
loss: 0.087207  [44864/60000]
loss: 0.166682  [51264/60000]
loss: 0.089349  [57664/60000]
Test Error: 
 Accuracy: 96.9%, Avg loss: 0.102443 

Epoch 2
-------------------------------
loss: 0.040872  [   64/60000]
loss: 0.054087  [ 6464/60000]
loss: 0.030931  [12864/60000]
loss: 0.076550  [19264/60000]
loss: 0.044052  [25664/60000]
loss: 0.020273  [32064/60000]
loss: 0.019043  [38464/60000]
loss: 0.078896  [44864/60000]
loss: 0.081676  [51264/60000]
loss: 0.018332  [57664/60000]
Test Error: 
 Accuracy: 97.6%, Avg loss: 0.078491 

Epoch 3
-------------------------------
loss: 0.012038  [   64/60000]
loss: 0.083925  [ 6464/60000]
loss: 0.016305  [12864/60000]
loss: 0.027948  [19264/60000]
loss: 0.037500  [25664/60000]
loss: 0.023734  [32064/60000]
loss: 0.190250  [38464/60000]
loss: 

KeyboardInterrupt: 

In [59]:
example_inputs = (torch.randn(1, 1, 28, 28),)
model.to('cpu')
onnx_program = torch.onnx.export(model, example_inputs, dynamo=True)

[torch.onnx] Obtain model graph for `NeuralNetwork([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `NeuralNetwork([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅


In [61]:
onnx_program.save("model.onnx")

In [64]:
# Inspect pixel values sent to the model (black vs white)
try:
    sample_img, _ = training_data[5]
except NameError:
    from torchvision import datasets, transforms
    sample_img, _ = datasets.MNIST(root='data', train=True, download=True, transform=transforms.ToTensor())[0]

img = sample_img.squeeze()  # (28, 28)
flat = img.flatten()
min_val = flat.min().item()
max_val = flat.max().item()
mean_val = flat.mean().item()
zeros = (flat == 0).sum().item()
nonzeros = flat.numel() - zeros
first_row = img[0].tolist()
last_row = img[-1].tolist()

print({
    'min': min_val,
    'max': max_val,
    'mean': mean_val,
    'zeros': zeros,
    'nonzeros': nonzeros,
    'first_row': first_row,
    'last_row': last_row,
})

{'min': 0.0, 'max': 1.0, 'mean': 0.14806422591209412, 'zeros': 596, 'nonzeros': 188, 'first_row': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 'last_row': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
