In [43]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Compose, Normalize
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
import datetime

In [44]:
# Normalisation des données
transform = Compose([
    ToTensor(),
    Normalize((0.5,), (0.5,))
])

training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=transform
)
test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=transform
)

In [45]:
training_dataloader = DataLoader(
    training_data,
    batch_size=64,
    shuffle=True,
    pin_memory=True,
    num_workers=4
)

test_dataloader = DataLoader(
    test_data,
    batch_size=64,
    shuffle=False,
    pin_memory=True,
    num_workers=4
)

In [46]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        # CNN filtres convolutionnels
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1), # 1x28x28 -> 32x28x28
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2), # 32x28x28 -> 32x14x14
            nn.Conv2d(32, 64, kernel_size=3, padding=1), # 32x14x14 -> 64x14x14
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2), # 64x14x14 -> 64x7x7
        )
        # MLP
        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(64 * 7 * 7, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.flatten(x)
        logits = self.fc(x)
        return logits

In [47]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (conv): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): Sequential(
    (0): Linear(in_features=3136, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=128, out_features=10, bias=True)
  )
)


In [48]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

# TensorBoard
log_dir = "runs/mnist_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
writer = SummaryWriter(log_dir)
print(f"TensorBoard logs: {log_dir}")

dummy_input = torch.randn(1, 1, 28, 28).to(device)
writer.add_graph(model, dummy_input)

dataiter = iter(training_dataloader)
images, labels = next(dataiter)
writer.add_images('mnist_examples', images[:16], 0)

TensorBoard logs: runs/mnist_20251211-141743


In [49]:
def train(dataloader, model, loss_fn, optimizer, epoch, writer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.train()
    running_loss = 0.0
    correct = 0
    
    for batch_idx, batch_value in enumerate(dataloader):
        X, y = batch_value
        X, y = X.to(device), y.to(device)

        # forward pass
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backward
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        running_loss += loss.item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        
        if batch_idx % 100 == 0:
            loss_val, current = loss.item(), (batch_idx+1) * len(X)
            print(f"loss: {loss_val:>7f}  [{current:>5d}/{size:>5}]")
            # Log batch loss TensorBoard
            global_step = epoch * num_batches + batch_idx
            writer.add_scalar('Loss/train_batch', loss_val, global_step)
    
    # Log average epoch loss et accuracy
    avg_loss = running_loss / num_batches
    accuracy = correct / size
    writer.add_scalar('Loss/train_epoch', avg_loss, epoch)
    writer.add_scalar('Accuracy/train', 100 * accuracy, epoch)
    
    print(f"Train Accuracy: {(100*accuracy):>0.1f}%")
    return avg_loss, accuracy

In [50]:
def test(dataloader, model, loss_fn, epoch, writer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    accuracy = correct / size
    
    # Log TensorBoard
    writer.add_scalar('Loss/test', test_loss, epoch)
    writer.add_scalar('Accuracy/test', 100 * accuracy, epoch)
    
    print(f"Test Error: \n Accuracy: {(100*accuracy):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return test_loss, accuracy

In [51]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(training_dataloader, model, loss_fn, optimizer, t, writer)
    test(test_dataloader, model, loss_fn, t, writer)
    
    # Log histogrammes des poids à chaque epoch
    for name, param in model.named_parameters():
        writer.add_histogram(f'Parameters/{name}', param, t)
        if param.grad is not None:
            writer.add_histogram(f'Gradients/{name}', param.grad, t)

writer.close()
print("Done!")
print(f"\nPour visualiser TensorBoard, executez: .venv\\Scripts\\tensorboard --logdir=runs")

Epoch 1
-------------------------------
loss: 2.467238  [   64/60000]
loss: 1.161456  [ 6464/60000]
loss: 0.881448  [12864/60000]
loss: 0.726967  [19264/60000]
loss: 0.748845  [25664/60000]
loss: 0.653951  [32064/60000]
loss: 0.556463  [38464/60000]
loss: 0.615584  [44864/60000]
loss: 0.615553  [51264/60000]
loss: 0.575344  [57664/60000]
Train Accuracy: 84.3%
Test Error: 
 Accuracy: 94.2%, Avg loss: 0.448914 

Epoch 2
-------------------------------
loss: 0.540701  [   64/60000]
loss: 0.529139  [ 6464/60000]
loss: 0.442498  [12864/60000]
loss: 0.439419  [19264/60000]
loss: 0.399032  [25664/60000]
loss: 0.359559  [32064/60000]
loss: 0.401901  [38464/60000]
loss: 0.276508  [44864/60000]
loss: 0.275279  [51264/60000]
loss: 0.307015  [57664/60000]
Train Accuracy: 93.4%
Test Error: 
 Accuracy: 95.9%, Avg loss: 0.276948 

Epoch 3
-------------------------------
loss: 0.350773  [   64/60000]
loss: 0.406567  [ 6464/60000]
loss: 0.279225  [12864/60000]
loss: 0.260993  [19264/60000]
loss: 0.3010

In [52]:
example_inputs = (torch.randn(1,1,28,28),)
model.to("cpu")
onnx_program = torch.onnx.export(model, example_inputs, dynamo=True)
onnx_program.save("model_rendu.onnx")

[torch.onnx] Obtain model graph for `NeuralNetwork([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `NeuralNetwork([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 3 of general pattern rewrite rules.
