In [49]:
import torch
import numpy as np

In [50]:
w = torch.tensor([2.0], requires_grad=True)
b = torch.tensor([1.0], requires_grad=True)

# Input
x = torch.tensor([3.0])

In [51]:
# Forward pass
y_pred = w * x + b
print("y_pred:", {y_pred.item()})

y_pred: {7.0}


In [52]:
# Loss
y_true = torch.tensor([10.0])
loss = (y_pred - y_true) ** 2
print("Loss:", {loss.item()})

Loss: {9.0}


In [53]:
print("Avant backward: w.grad =", w.grad)

Avant backward: w.grad = None


In [54]:
loss.backward()

In [55]:
print("Apr√®s backward: w.grad =", w.grad)
print("Apres backward: b.grad =", b.grad)

Apr√®s backward: w.grad = tensor([-18.])
Apres backward: b.grad = tensor([-6.])


In [56]:
learning_rate = 0.01
with torch.no_grad(): # D√©sactiver le tracking des gradients pour l'update w = w + alpha * w
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

In [57]:
print("Nouveau poids : w =", w, "b =", b)

Nouveau poids : w = tensor([2.1800], requires_grad=True) b = tensor([1.0600], requires_grad=True)


In [58]:
# R√©initialiser les gradients
w.grad.zero_()
b.grad.zero_()
print("Apr√®s zero_(): w.grad =", w.grad, "b.grad =", b.grad)

Apr√®s zero_(): w.grad = tensor([0.]) b.grad = tensor([0.])


In [59]:
import torch.nn as nn

perceptron = nn.Linear(1, 1)

# Voir les poids initiaux
print("Poids :", perceptron.weight)
print("Biais :", perceptron.bias)

Poids : Parameter containing:
tensor([[0.1339]], requires_grad=True)
Biais : Parameter containing:
tensor([-0.4523], requires_grad=True)


In [60]:
# Forward
x = torch.tensor([3.0]) # Shape (batch_size, features)
print(f"input : {x}")
y_pred = perceptron(x)
print(y_pred)


input : tensor([3.])
tensor([-0.0506], grad_fn=<ViewBackward0>)


In [61]:
# Loss
y_true = torch.tensor([10.0])
criterion = nn.MSELoss()
loss = criterion(y_pred, y_true)

In [62]:
loss

tensor(101.0154, grad_fn=<MseLossBackward0>)

In [63]:
# Backward
loss.backward()

In [64]:
print(f"Gradient de poids : {perceptron.weight.grad}")
print(f"Gradient de biais : {perceptron.bias.grad}")

Gradient de poids : tensor([[-60.3038]])
Gradient de biais : tensor([-20.1013])


In [65]:
import torch.optim as optim
optimizer = optim.SGD(perceptron.parameters(), lr=0.01)

In [66]:
optimizer.step()

In [67]:
print("Nouveau poids :", perceptron.weight)
print("Nouveau biais :", perceptron.bias)

Nouveau poids : Parameter containing:
tensor([[0.7369]], requires_grad=True)
Nouveau biais : Parameter containing:
tensor([-0.2512], requires_grad=True)


In [68]:
optimizer.zero_grad()

In [69]:
print(f"Poids apr√®s zero_grad : {perceptron.weight.grad}")
print(f"Biais apr√®s zero_grad : {perceptron.bias.grad}")

Poids apr√®s zero_grad : None
Biais apr√®s zero_grad : None


In [70]:
class SimpleNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2, 4)
        self.fc2 = nn.Linear(4, 3)
        self.fc3 = nn.Linear(3, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

In [71]:
model = SimpleNet()

In [72]:
model

SimpleNet(
  (fc1): Linear(in_features=2, out_features=4, bias=True)
  (fc2): Linear(in_features=4, out_features=3, bias=True)
  (fc3): Linear(in_features=3, out_features=1, bias=True)
  (relu): ReLU()
  (sigmoid): Sigmoid()
)

In [73]:
x = torch.tensor([1.0, 2.0])
y = torch.tensor([5.0])
y_pred = model(x)
loss = nn.MSELoss()(y_pred, y)

loss.backward()

In [74]:
import torch
from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models
from torchvision.transforms import ToTensor, Compose, Normalize, RandomHorizontalFlip, RandomCrop, ColorJitter, Resize
import onnxruntime as rt
import matplotlib.pyplot as plt
import torch.optim as optim

In [75]:
writer = SummaryWriter()

In [76]:
def get_mean_std(dataset):
    loader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=0)
    mean = 0
    std = 0
    num_pixels = 0
    for images, _ in loader:
        batch_size, num_channels, height, width = images.shape
        num_pixels += batch_size * height * width
        mean += images.mean(axis=(0, 2, 3)).sum()
        std += images.std(axis=(0, 2, 3)).sum()

    mean /= num_pixels
    std /= num_pixels

    return mean, std    

In [77]:
# temp_data = datasets.MNIST(
#     root='data',
#     train=True,
#     download=True,
#     transform=ToTensor()
# )

# mean, std = get_mean_std(temp_data)
# print(f"Mean: {mean}, Std: {std}")

transform_train = Compose([
    RandomHorizontalFlip(),                                       
    RandomCrop(32, padding=4),                                   
    ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),  
    ToTensor(),
    Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)) 
])

transform_test = Compose([
    
    ToTensor(),
    Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    # ToTensor(),
    # Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    # Normalize((0.1307,), (0.3081,))
    # Normalize(mean=(mean,), std=(std,))
])

training_data = datasets.CIFAR10(
    root='data',
    train=True,
    download=True,
    transform=transform_train
)

test_data = datasets.CIFAR10(
    root='data',
    train=False,
    download=True,
    transform=transform_test
)



In [78]:
training_dataloader = DataLoader(training_data, batch_size=64, shuffle=True, pin_memory=True, num_workers=4, drop_last=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=False, pin_memory=True, num_workers=4, drop_last=False)

In [79]:
print(training_data.classes)
print(training_data.data.size)

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
153600000


In [80]:
def create_transfer_learning_model(num_classes=10, model_name='resnet18', fine_tune_layers=2):
    """Cr√©e un mod√®le pr√©-entra√Æn√© pour Transfer Learning"""
    if model_name == 'resnet18':
        model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        
        for param in model.parameters():
            param.requires_grad = False
        
        layers_to_unfreeze = []
        if fine_tune_layers >= 1:
            layers_to_unfreeze.append(model.layer4)
        if fine_tune_layers >= 2:
            layers_to_unfreeze.append(model.layer3)
        if fine_tune_layers >= 3:
            layers_to_unfreeze.append(model.layer2)
        if fine_tune_layers >= 4:
            layers_to_unfreeze.append(model.layer1)
        
        for layer in layers_to_unfreeze:
            for param in layer.parameters():
                param.requires_grad = True
        
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, num_classes)
        
    elif model_name == 'alexnet':
        model = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1)
        
        for param in model.parameters():
            param.requires_grad = False
        
        if fine_tune_layers >= 1:
            for param in model.classifier.parameters():
                param.requires_grad = True
        
        if fine_tune_layers >= 2:
            for param in model.features[-6:].parameters():
                param.requires_grad = True
        
        num_ftrs = model.classifier[6].in_features
        model.classifier[6] = nn.Linear(num_ftrs, num_classes)
    
    else:
        raise ValueError(f"Mod√®le '{model_name}' non support√©. Utilisez 'resnet18' ou 'alexnet'")
    
    return model


In [81]:
MODEL_NAME = 'alexnet'  
FINE_TUNE_LAYERS = 2    

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
model = create_transfer_learning_model(
    num_classes=10,
    model_name=MODEL_NAME,
    fine_tune_layers=FINE_TUNE_LAYERS
)
model = model.to(device)

In [82]:
loss_fn = nn.CrossEntropyLoss()

params_to_update = [p for p in model.parameters() if p.requires_grad]

if MODEL_NAME == 'alexnet':
    lr = 0.0005
else:
    lr = 0.001

optimizer = optim.SGD(params_to_update, lr=lr, momentum=0.9)

print(f"üéØ Optimiseur configur√© :")
print(f"   Learning rate : {lr}")
print(f"   Momentum      : 0.9")
print(f"   Param√®tres √† entra√Æner : {sum(p.numel() for p in params_to_update):,}\n")


üéØ Optimiseur configur√© :
   Learning rate : 0.0005
   Momentum      : 0.9
   Param√®tres √† entra√Æner : 56,050,186



In [83]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.batchnorm1 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.batchnorm2 = nn.BatchNorm2d(128)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1)
        self.batchnorm3 = nn.BatchNorm2d(64)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(64*4*4, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.batchnorm1(x)
        x = F.relu(x)
        x = self.pool(x)
        
        x = self.conv2(x)
        x = self.batchnorm2(x)
        x = F.relu(x)
        x = self.pool2(x)
        
        x = self.conv3(x)
        x = self.batchnorm3(x)
        x = F.relu(x)
        x = self.pool(x)
        
        x = torch.flatten(x, 1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return x

In [84]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
model = CNN().to(device)
print(model)

CNN(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchnorm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchnorm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchnorm3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc1): Linear(in_features=1024, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=10, bias=True)
)


In [85]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [86]:
x = torch.rand(1, 3, 32, 32, device=device)
logits = model(x)

In [87]:
logits

tensor([[ 0.1042,  0.1485,  0.1145, -0.0178,  0.1803, -0.2377,  0.0024,  0.0342,
         -0.0014, -0.1715]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [88]:
pred_probab = nn.Softmax(dim=1)(logits)
pred_probab

tensor([[0.1084, 0.1133, 0.1095, 0.0960, 0.1170, 0.0770, 0.0979, 0.1011, 0.0975,
         0.0823]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [89]:
y_pred = pred_probab.argmax(dim=1)
y_pred

tensor([4], device='cuda:0')

In [90]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch_idx, batch_value in enumerate(dataloader):
        X, y = batch_value
        X, y = X.to(device), y.to(device)

        # Forward pass
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backward pass
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch_idx % 100 == 0:
            loss, current = loss.item(), (batch_idx+1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5}]")


In [91]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
            

In [92]:
epochs = 10
for t in range(epochs):
    running_loss = 0.0
    model.train()
    print(f"Epoch {t+1}\n-------------------------------")
    for i, data in enumerate(training_dataloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        
        writer.add_scalar('Loss/train', loss.item(), t * len(training_dataloader) + i)

        running_loss += loss.item()
        if i % 100 == 99:
            print(f'[Epoch {t + 1}, Batch {i + 1:5d}] loss: {running_loss / 100:.3f}')
            running_loss = 0.0
        
    test(test_dataloader, model, loss_fn)
print("Done!")
writer.flush()

Epoch 1
-------------------------------
[Epoch 1, Batch   100] loss: 2.277
[Epoch 1, Batch   200] loss: 2.159
[Epoch 1, Batch   300] loss: 1.994
[Epoch 1, Batch   400] loss: 1.866
[Epoch 1, Batch   500] loss: 1.786
[Epoch 1, Batch   600] loss: 1.711
[Epoch 1, Batch   700] loss: 1.682
Test Error: 
 Accuracy: 46.0%, Avg loss: 1.452792 

Epoch 2
-------------------------------
[Epoch 2, Batch   100] loss: 1.585
[Epoch 2, Batch   200] loss: 1.549
[Epoch 2, Batch   300] loss: 1.520
[Epoch 2, Batch   400] loss: 1.508
[Epoch 2, Batch   500] loss: 1.476
[Epoch 2, Batch   600] loss: 1.430
[Epoch 2, Batch   700] loss: 1.401
Test Error: 
 Accuracy: 56.7%, Avg loss: 1.188616 

Epoch 3
-------------------------------
[Epoch 3, Batch   100] loss: 1.366
[Epoch 3, Batch   200] loss: 1.330
[Epoch 3, Batch   300] loss: 1.308
[Epoch 3, Batch   400] loss: 1.283
[Epoch 3, Batch   500] loss: 1.288
[Epoch 3, Batch   600] loss: 1.254
[Epoch 3, Batch   700] loss: 1.245
Test Error: 
 Accuracy: 60.6%, Avg loss: 

In [93]:
example_input = (torch.randn(1, 3, 32, 32),)
model.to("cpu")
model.eval()
onnx_program = torch.onnx.export(model, example_input, input_names=["input"], output_names=["output"], dynamo=True)

[torch.onnx] Obtain model graph for `CNN([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `CNN([...]` with `torch.export.export(..., strict=False)`... ‚úÖ
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ‚úÖ
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ‚úÖ
Applied 3 of general pattern rewrite rules.


In [94]:
onnx_program.save("model.onnx")
sess = rt.InferenceSession("model.onnx", providers= rt.get_available_providers())
input_name = sess.get_inputs()[0].name
pred_onnx = sess.run(None, {input_name: example_input[0].numpy()})
print(pred_onnx)

[array([[ 0.42486   , -1.645762  ,  2.1923857 ,  0.05976705,  1.6585653 ,
        -0.3097263 ,  1.3450729 ,  1.77758   , -2.3318365 , -0.25082293]],
      dtype=float32)]
