# Veille Technologique - Modele CNN ResNet50 vs Vision Transformers (ViT)

## Imports et config

In [75]:
# %pip install timm

In [76]:
import os
import timm
import torch
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from PIL import Image
from pathlib import Path
from torch import nn, optim
from torchvision import transforms, models
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

Device: cpu


## Chargement du dataset

In [77]:
SOURCES_PATH = "../Sources/"
CSV_FILE = "flipkart_com-ecommerce_sample_1050.csv"
CSV_FILEPATH = SOURCES_PATH + CSV_FILE
IMG_ROOT = Path(SOURCES_PATH + "Images/")

df = pd.read_csv(CSV_FILEPATH)

print('DataSet Shape = {}'.format(df.shape))
print('DataSet Memory Usage = {:.2f} MB'.format(df.memory_usage().sum() / 1024**2))

DataSet Shape = (1050, 15)
DataSet Memory Usage = 0.11 MB


#### Je cree ma colonne avec mes 7 categories

In [78]:
df['category_simple'] = df['product_category_tree'].apply(lambda x: x.split(' >> ')[0].split('["')[1])

#### On conserve uniquement les lignes avec des images existantes, mesure de securite

In [79]:
def img_exists(row):
    return (IMG_ROOT / str(row["image"])).exists()

In [80]:
df['img_path'] = df['image'].apply(lambda x: IMG_ROOT / str(x))
print("Nombre total de lignes/images:", len(df))

Nombre total de lignes/images: 1050


#### Encodage des labels

In [81]:
classes = sorted(df['category_simple'].unique())
class_to_idx = {c: i for i, c in enumerate(classes)}
idx_to_class = {i: c for c, i in class_to_idx.items()}

In [82]:
df['label'] = df['category_simple'].map(class_to_idx)
num_classes = len(classes)
print("Classes:", classes)

Classes: ['Baby Care', 'Beauty and Personal Care', 'Computers', 'Home Decor & Festive Needs', 'Home Furnishing', 'Kitchen & Dining', 'Watches']


## Split train / val / test

#### Stratifier sur la categorie

In [83]:
train_df, temp_df = train_test_split(
    df,
    test_size=0.30,
    stratify=df["label"],
    random_state=SEED
)

In [84]:
val_df, test_df = train_test_split(
    temp_df,
    test_size=0.50,
    stratify=temp_df["label"],
    random_state=SEED
)

In [85]:
len(train_df), len(val_df), len(test_df)

(735, 157, 158)

## Dataset PyTorch + data augmentation

In [86]:
IMG_SIZE = 224

In [87]:
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],  # ImageNet
        std=[0.229, 0.224, 0.225]
    ),
])

eval_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ),
])

#### Class pour les images

In [88]:
class FlipkartImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(row['img_path']).convert("RGB")
        if self.transform:
            img = self.transform(img)
        label = int(row['label'])
        return img, label

In [89]:
train_dataset = FlipkartImageDataset(train_df, transform=train_transform)
val_dataset   = FlipkartImageDataset(val_df, transform=eval_transform)
test_dataset  = FlipkartImageDataset(test_df, transform=eval_transform)

BATCH_SIZE = 16

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
test_loader  = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

## Fonctions utilitaires pour l'entrainement et l'evaluation

In [90]:
def train_one_epoch(model, loader, criterion, optimizer, log_every=20):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for i, (images, labels) in enumerate(loader):
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

        if i % log_every == 0:
            print(f"batch {i}/{len(loader)} | loss={loss.item():.3f} | acc={(correct/total):.3f}")

    return running_loss / len(loader.dataset), correct / total

In [91]:
@torch.no_grad()
def evaluate(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    all_preds, all_labels = [], []

    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        running_loss += loss.item() * images.size(0)
        preds = outputs.argmax(dim=1)
        all_preds.extend(preds.detach().cpu().numpy())
        all_labels.extend(labels.detach().cpu().numpy())

    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = accuracy_score(all_labels, all_preds)
    return epoch_loss, epoch_acc, np.array(all_labels), np.array(all_preds)

#### Visualisations

In [92]:
def plot_history(history, title=""):
    epochs = range(1, len(history["train_loss"]) + 1)
    fig, axes = plt.subplots(1, 2, figsize=(10, 4))

    axes[0].plot(epochs, history["train_loss"], label="Train")
    axes[0].plot(epochs, history["val_loss"],   label="Val")
    axes[0].set_title("Loss")
    axes[0].set_xlabel("Epoch")
    axes[0].legend()

    axes[1].plot(epochs, history["train_acc"], label="Train")
    axes[1].plot(epochs, history["val_acc"],   label="Val")
    axes[1].set_title("Accuracy")
    axes[1].set_xlabel("Epoch")
    axes[1].legend()

    plt.suptitle(title)
    plt.tight_layout()
    plt.show()

## Modele CNN Baseline : ResNet50

In [93]:
resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)

#### On remplace la derniere couche FC par notre tete et on gele le backbone

In [94]:
for p in resnet.parameters():
    p.requires_grad = False

In [95]:
in_features = resnet.fc.in_features
resnet.fc = nn.Linear(in_features, num_classes)

In [96]:
resnet = resnet.to(device)

In [97]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet.parameters(), lr=1e-4, weight_decay=1e-4)

In [98]:
EPOCHS = 15
best_val_acc = 0.0
history_resnet = {"train_loss": [], "val_loss": [], "train_acc": [], "val_acc": []}

In [99]:
for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc = train_one_epoch(resnet, train_loader, criterion, optimizer)
    val_loss, val_acc, _, _ = evaluate(resnet, val_loader, criterion)

    history_resnet["train_loss"].append(train_loss)
    history_resnet["val_loss"].append(val_loss)
    history_resnet["train_acc"].append(train_acc)
    history_resnet["val_acc"].append(val_acc)

    print(f"[ResNet] Epoch {epoch:02d} | "
          f"Train loss={train_loss:.3f}, acc={train_acc:.3f} | "
          f"Val loss={val_loss:.3f}, acc={val_acc:.3f}")

    # on arrete de maniere anticipee 
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(resnet.state_dict(), "resnet50_best.pth")

batch 0/46 | loss=1.957 | acc=0.188
batch 20/46 | loss=1.877 | acc=0.208




batch 40/46 | loss=1.784 | acc=0.282
[ResNet] Epoch 01 | Train loss=1.873, acc=0.302 | Val loss=1.784, acc=0.452
batch 0/46 | loss=1.707 | acc=0.688
batch 20/46 | loss=1.735 | acc=0.580




batch 40/46 | loss=1.589 | acc=0.607
[ResNet] Epoch 02 | Train loss=1.704, acc=0.614 | Val loss=1.643, acc=0.650
batch 0/46 | loss=1.669 | acc=0.625




batch 20/46 | loss=1.622 | acc=0.738
batch 40/46 | loss=1.506 | acc=0.748
[ResNet] Epoch 03 | Train loss=1.552, acc=0.739 | Val loss=1.509, acc=0.720
batch 0/46 | loss=1.481 | acc=0.688
batch 20/46 | loss=1.348 | acc=0.735




batch 40/46 | loss=1.545 | acc=0.758
[ResNet] Epoch 04 | Train loss=1.431, acc=0.762 | Val loss=1.393, acc=0.764
batch 0/46 | loss=1.426 | acc=0.625
batch 20/46 | loss=1.289 | acc=0.750




batch 40/46 | loss=1.221 | acc=0.796
[ResNet] Epoch 05 | Train loss=1.320, acc=0.788 | Val loss=1.310, acc=0.777
batch 0/46 | loss=1.110 | acc=0.938




batch 20/46 | loss=1.276 | acc=0.807
batch 40/46 | loss=1.176 | acc=0.799
[ResNet] Epoch 06 | Train loss=1.224, acc=0.795 | Val loss=1.236, acc=0.771
batch 0/46 | loss=1.135 | acc=0.750
batch 20/46 | loss=1.219 | acc=0.818




batch 40/46 | loss=0.894 | acc=0.814
[ResNet] Epoch 07 | Train loss=1.136, acc=0.812 | Val loss=1.147, acc=0.783
batch 0/46 | loss=1.178 | acc=0.750




batch 20/46 | loss=1.107 | acc=0.830
batch 40/46 | loss=1.008 | acc=0.808
[ResNet] Epoch 08 | Train loss=1.077, acc=0.814 | Val loss=1.099, acc=0.764
batch 0/46 | loss=0.986 | acc=0.812
batch 20/46 | loss=0.925 | acc=0.810




batch 40/46 | loss=0.871 | acc=0.811
[ResNet] Epoch 09 | Train loss=1.004, acc=0.822 | Val loss=1.042, acc=0.783
batch 0/46 | loss=0.971 | acc=0.812
batch 20/46 | loss=0.954 | acc=0.804
batch 40/46 | loss=1.055 | acc=0.812




[ResNet] Epoch 10 | Train loss=0.961, acc=0.819 | Val loss=1.000, acc=0.796
batch 0/46 | loss=0.974 | acc=0.875
batch 20/46 | loss=0.835 | acc=0.848
batch 40/46 | loss=0.827 | acc=0.826




[ResNet] Epoch 11 | Train loss=0.908, acc=0.826 | Val loss=0.961, acc=0.796
batch 0/46 | loss=1.046 | acc=0.562




batch 20/46 | loss=0.982 | acc=0.810
batch 40/46 | loss=0.741 | acc=0.828
[ResNet] Epoch 12 | Train loss=0.876, acc=0.824 | Val loss=0.927, acc=0.771
batch 0/46 | loss=1.119 | acc=0.625
batch 20/46 | loss=0.747 | acc=0.792




batch 40/46 | loss=0.849 | acc=0.817
[ResNet] Epoch 13 | Train loss=0.850, acc=0.811 | Val loss=0.894, acc=0.796
batch 0/46 | loss=0.766 | acc=0.875
batch 20/46 | loss=0.729 | acc=0.821




batch 40/46 | loss=0.797 | acc=0.822
[ResNet] Epoch 14 | Train loss=0.811, acc=0.827 | Val loss=0.868, acc=0.790
batch 0/46 | loss=0.855 | acc=0.812




batch 20/46 | loss=0.819 | acc=0.830
batch 40/46 | loss=0.876 | acc=0.829
[ResNet] Epoch 15 | Train loss=0.761, acc=0.835 | Val loss=0.846, acc=0.790


#### A lancer

In [None]:
plot_history(history_resnet, title="ResNet50")

## Vision Transformer (ViT-B/16)

In [None]:
vit = timm.create_model(
    "vit_base_patch16_224",
    pretrained=True,
    num_classes=num_classes
)
vit = vit.to(device)

criterion_vit = nn.CrossEntropyLoss()
optimizer_vit = optim.AdamW(vit.parameters(), lr=2e-5, weight_decay=1e-4)

EPOCHS_VIT = 15
best_val_acc_vit = 0.0
history_vit = {"train_loss": [], "val_loss": [], "train_acc": [], "val_acc": []}

for epoch in range(1, EPOCHS_VIT + 1):
    train_loss, train_acc = train_one_epoch(vit, train_loader, criterion_vit, optimizer_vit)
    val_loss, val_acc, _, _ = evaluate(vit, val_loader, criterion_vit)

    history_vit["train_loss"].append(train_loss)
    history_vit["val_loss"].append(val_loss)
    history_vit["train_acc"].append(train_acc)
    history_vit["val_acc"].append(val_acc)

    print(f"[ViT] Epoch {epoch:02d} | "
          f"Train loss={train_loss:.3f}, acc={train_acc:.3f} | "
          f"Val loss={val_loss:.3f}, acc={val_acc:.3f}")

    if val_acc > best_val_acc_vit:
        best_val_acc_vit = val_acc
        torch.save(vit.state_dict(), "vit_base_patch16_224_best.pth")

In [None]:
plot_history(history_vit, title="ViT-B/16")

## Évaluation finale sur le test set

In [None]:
# Charger les meilleurs poids
resnet.load_state_dict(torch.load("resnet50_best.pth", map_location=device))
vit.load_state_dict(torch.load("vit_base_patch16_224_best.pth", map_location=device))

# ResNet
test_loss_r, test_acc_r, y_true_r, y_pred_r = evaluate(resnet, test_loader, criterion)
print(f"ResNet50 - Test loss={test_loss_r:.3f}, acc={test_acc_r:.3f}")
print(classification_report(y_true_r, y_pred_r, target_names=classes))

# ViT
test_loss_v, test_acc_v, y_true_v, y_pred_v = evaluate(vit, test_loader, criterion_vit)
print(f"ViT-B/16 - Test loss={test_loss_v:.3f}, acc={test_acc_v:.3f}")
print(classification_report(y_true_v, y_pred_v, target_names=classes))

# Matrices de confusion
def plot_confusion(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=classes, yticklabels=classes)
    plt.xlabel("Prédit")
    plt.ylabel("Vrai")
    plt.title(title)
    plt.tight_layout()
    plt.show()

plot_confusion(y_true_r, y_pred_r, "Matrice de confusion - ResNet50")
plot_confusion(y_true_v, y_pred_v, "Matrice de confusion - ViT-B/16")