# Installation des dépendances


In [1]:
pip install torch torchvision torchaudio pandas pillow tqdm scikit-learn

Note: you may need to restart the kernel to use updated packages.


# Chargement du dataset et des transformations


In [1]:
import os
import pandas as pd
from PIL import Image, ImageFile
from torch.utils.data import Dataset
from torchvision import transforms

# Empêcher des crashs sur images corrompues
ImageFile.LOAD_TRUNCATED_IMAGES = True

class BirdDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.df = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform
        assert 'path' in self.df.columns and 'class_idx' in self.df.columns, "Le CSV doit contenir 'path' et 'class_idx'"

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_name = os.path.join(self.img_dir, row['path'])
        image = Image.open(img_name).convert("RGB")
        label = int(row['class_idx'])
        
        if self.transform:
            image = self.transform(image)
        return image, label

# Transformations (Data Augmentation)
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.6, 1.0)),
    transforms.RandAugment(num_ops=2, magnitude=9),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.02),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.25, scale=(0.02, 0.15), ratio=(0.3, 3.3), value="random")
])

val_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Configuration des chemins et hyperparamètres


In [2]:
import torch
import os

torch.backends.mkldnn.enabled = False  # Évite des crashs CPU sous Windows

class Config:
    TRAIN_CSV = "train_metadata.csv"
    SUB_CSV = "sample_submission.csv"
    TRAIN_DIR = "train_images"
    TEST_DIR = os.path.join("test_images", "mistery_cat")

    NUM_CLASSES = 20
    BATCH_SIZE = 8
    LR = 7e-4  # OneCycleLR max_lr
    EPOCHS = 12  # plus d'epochs

    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Limiter les threads pour réduire la contention CPU en notebooks
try:
    torch.set_num_threads(max(1, os.cpu_count() // 2))
except Exception:
    pass

In [None]:
import torch.nn as nn
from torchvision import models

def get_bird_model(num_classes):
    # Modèle plus puissant: ConvNeXt-Tiny pré-entraîné
    model = models.convnext_tiny(weights=models.ConvNeXt_Tiny_Weights.DEFAULT)

    # Adapter la dernière couche
    in_features = model.classifier[2].in_features
    model.classifier[2] = nn.Sequential(
        nn.Dropout(p=0.4),
        nn.Linear(in_features, num_classes)
    )
    return model

# Définition du modèle
Utilise ConvNeXt-Tiny pré-entraîné et remplace la dernière couche pour 20 classes.

In [4]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset, WeightedRandomSampler
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import numpy as np


def mixup_data(x, y, alpha=0.2):
    if alpha <= 0:
        return x, y, 1.0, y
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size(0)
    index = torch.randperm(batch_size, device=x.device)
    mixed_x = lam * x + (1 - lam) * x[index]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, lam, y_b


def main():
    # 1. Préparation des données
    full_ds = BirdDataset(Config.TRAIN_CSV, Config.TRAIN_DIR, transform=train_transforms)
    indices = list(range(len(full_ds)))
    train_idx, val_idx = train_test_split(indices, test_size=0.2, random_state=42, shuffle=True)

    # Compute class weights for WeightedRandomSampler
    train_labels = [int(full_ds.df.iloc[i]['class_idx']) for i in train_idx]
    class_counts = np.bincount(train_labels, minlength=Config.NUM_CLASSES)
    class_weights = 1.0 / (class_counts + 1e-6)
    sample_weights = [class_weights[label] for label in train_labels]
    sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

    # Windows/Jupyter: éviter les deadlocks -> num_workers=0
    train_loader = DataLoader(
        Subset(full_ds, train_idx),
        batch_size=Config.BATCH_SIZE,
        shuffle=False,
        sampler=sampler,
        num_workers=0,
        pin_memory=(Config.DEVICE == "cuda")
    )
    # Validation dataset with val_transforms
    val_ds = BirdDataset(Config.TRAIN_CSV, Config.TRAIN_DIR, transform=val_transforms)
    val_loader = DataLoader(
        Subset(val_ds, val_idx),
        batch_size=Config.BATCH_SIZE,
        num_workers=0,
        pin_memory=(Config.DEVICE == "cuda")
    )

    # 2. Setup
    model = get_bird_model(Config.NUM_CLASSES).to(Config.DEVICE)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.05)
    optimizer = torch.optim.AdamW(model.parameters(), lr=Config.LR, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=Config.LR,
        epochs=Config.EPOCHS,
        steps_per_epoch=len(train_loader)
    )

    best_acc = 0

    # Warmup: geler le backbone sur le premier epoch
    def freeze_backbone(m):
        for p in m.parameters():
            p.requires_grad = False
        for p in m.classifier.parameters():
            p.requires_grad = True

    def unfreeze_backbone(m):
        for p in m.parameters():
            p.requires_grad = True

    # 3. Boucle d'entraînement
    for epoch in range(Config.EPOCHS):
        if epoch == 0:
            freeze_backbone(model)
        elif epoch == 1:
            unfreeze_backbone(model)

        model.train()
        running_loss, correct, total = 0.0, 0, 0
        for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{Config.EPOCHS}"):
            imgs, labels = imgs.to(Config.DEVICE), labels.to(Config.DEVICE)

            # MixUp with 50% probability
            if np.random.rand() < 0.5:
                imgs, y_a, lam, y_b = mixup_data(imgs, labels, alpha=0.2)
            else:
                y_a, lam, y_b = labels, 1.0, labels

            optimizer.zero_grad()
            outputs = model(imgs)
            loss = lam * criterion(outputs, y_a) + (1 - lam) * criterion(outputs, y_b)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            scheduler.step()

            _, pred = outputs.max(1)
            correct += pred.eq(labels).sum().item()
            total += labels.size(0)
            running_loss += loss.item() * labels.size(0)

        train_acc = 100.0 * correct / total if total else 0.0
        avg_loss = running_loss / total if total else 0.0

        # Validation
        model.eval()
        val_correct, val_total = 0, 0
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(Config.DEVICE), labels.to(Config.DEVICE)
                out = model(imgs)
                val_pred = out.max(1)[1]
                val_correct += val_pred.eq(labels).sum().item()
                val_total += labels.size(0)

        acc = 100.0 * val_correct / val_total if val_total else 0.0
        print(f"Epoch {epoch+1}: Train Acc {train_acc:.2f}% | Val Acc: {acc:.2f}% | Loss: {avg_loss:.4f}")

        if acc > best_acc:
            best_acc = acc
            torch.save(model.state_dict(), "best_bird_model.pth")
            print("Modèle sauvegardé !")

if __name__ == "__main__":
    main()

Downloading: "https://download.pytorch.org/models/convnext_tiny-983f1562.pth" to C:\Users\Hp/.cache\torch\hub\checkpoints\convnext_tiny-983f1562.pth
100%|██████████| 109M/109M [00:39<00:00, 2.89MB/s] 
Epoch 1/12: 100%|██████████| 109/109 [09:43<00:00,  5.36s/it]


Epoch 1: Train Acc 9.36% | Val Acc: 20.74% | Loss: 2.9890
Modèle sauvegardé !


Epoch 2/12: 100%|██████████| 109/109 [32:18<00:00, 17.78s/it]


Epoch 2: Train Acc 40.23% | Val Acc: 71.43% | Loss: 1.9889
Modèle sauvegardé !


Epoch 3/12: 100%|██████████| 109/109 [32:49<00:00, 18.07s/it]


Epoch 3: Train Acc 56.76% | Val Acc: 46.08% | Loss: 1.4193


Epoch 4/12: 100%|██████████| 109/109 [35:12<00:00, 19.38s/it]


Epoch 4: Train Acc 47.75% | Val Acc: 58.06% | Loss: 1.6238


Epoch 5/12: 100%|██████████| 109/109 [34:57<00:00, 19.24s/it]


Epoch 5: Train Acc 52.25% | Val Acc: 61.29% | Loss: 1.5331


Epoch 6/12: 100%|██████████| 109/109 [36:49<00:00, 20.27s/it]


Epoch 6: Train Acc 51.33% | Val Acc: 77.42% | Loss: 1.3308
Modèle sauvegardé !


Epoch 7/12: 100%|██████████| 109/109 [36:05<00:00, 19.87s/it]


Epoch 7: Train Acc 59.88% | Val Acc: 74.65% | Loss: 1.1579


Epoch 8/12: 100%|██████████| 109/109 [39:12<00:00, 21.58s/it]


Epoch 8: Train Acc 64.39% | Val Acc: 74.19% | Loss: 1.0951


Epoch 9/12: 100%|██████████| 109/109 [36:28<00:00, 20.08s/it]


Epoch 9: Train Acc 66.47% | Val Acc: 78.80% | Loss: 0.8951
Modèle sauvegardé !


Epoch 10/12: 100%|██████████| 109/109 [34:32<00:00, 19.01s/it]


Epoch 10: Train Acc 70.40% | Val Acc: 83.87% | Loss: 0.8277
Modèle sauvegardé !


Epoch 11/12: 100%|██████████| 109/109 [33:52<00:00, 18.64s/it]


Epoch 11: Train Acc 71.79% | Val Acc: 84.33% | Loss: 0.6647
Modèle sauvegardé !


Epoch 12/12: 100%|██████████| 109/109 [33:07<00:00, 18.23s/it]


Epoch 12: Train Acc 76.76% | Val Acc: 85.25% | Loss: 0.7137
Modèle sauvegardé !


# Entraînement du modèle

In [None]:
import pandas as pd
import torch
import os
from PIL import Image
from tqdm import tqdm
from torchvision.transforms import functional as TF


# Inference helper to convert PIL crops into normalized tensors
_infer_transform = None

def _ensure_infer_transform():
    global _infer_transform
    if _infer_transform is None:
        from torchvision import transforms
        _infer_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    return _infer_transform


def predict():
    model = get_bird_model(Config.NUM_CLASSES).to(Config.DEVICE)
    state = torch.load("best_bird_model.pth", map_location=Config.DEVICE)
    model.load_state_dict(state)
    model.eval()

    sub_df = pd.read_csv(Config.SUB_CSV)
    preds = []
    infer_tf = _ensure_infer_transform()

    with torch.no_grad():
        for img_name in tqdm(sub_df['path']):
            path = os.path.join(Config.TEST_DIR, img_name)
            img = Image.open(path).convert("RGB")

            # Resize une fois, puis multi-crop et flip horizontal, moyenne des logits
            img_r = TF.resize(img, [256, 256])
            crops = list(TF.five_crop(img_r, 224))  
            flips = [TF.hflip(c) for c in crops]
            all_views = crops + flips

            tensors = [infer_tf(c) for c in all_views]
            batch = torch.stack(tensors).to(Config.DEVICE)

            out = model(batch).mean(0)
            preds.append(out.argmax().item())

    sub_df['class_idx'] = preds
    sub_df.to_csv("submission.csv", index=False)
    print("Fichier submission.csv généré !")

if __name__ == "__main__":
    predict()

  state = torch.load("best_bird_model.pth", map_location=Config.DEVICE)
100%|██████████| 400/400 [39:32<00:00,  5.93s/it]

Fichier submission.csv généré !



