In [1]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))


True
Quadro M2200


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader, random_split
import os
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# --- NOUVELLE STRUCTURE POUR COMPCARS ---
# Le dataset CompCars a cette structure :
# archive/
# ├── image/
# │   ├── 1/          (make_id = 1)
# │   │   ├── 1101/   (model_id = 1)
# │   │   │   ├── 2011/
# │   │   │   └── *.jpg
# │   │   ├── 1102/   (model_id = 2)
# │   │   └── ...
# │   ├── 2/          (make_id = 2)
# │   └── ...
# └── label/
#     ├── 1/
#     └── ...

# Nous allons créer un dataset qui utilise directement cette structure
class CompCarsDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.class_to_idx = {}
        self.idx_to_class = {}

        # Parcourir la structure pour trouver toutes les images
        self._build_dataset()

    def _build_dataset(self):
        print("Construction du dataset CompCars...")
        image_count = 0
        class_idx = 0

        # Parcourir tous les make_id
        for make_id in os.listdir(os.path.join(self.root_dir, "image")):
            make_path = os.path.join(self.root_dir, "image", make_id)
            if not os.path.isdir(make_path):
                continue

            # Parcourir tous les model_id (sous-dossiers)
            for model_id in os.listdir(make_path):
                model_path = os.path.join(make_path, model_id)
                if not os.path.isdir(model_path):
                    continue

                # Créer un identifiant de classe unique
                class_name = f"make{make_id}_model{model_id}"

                if class_name not in self.class_to_idx:
                    self.class_to_idx[class_name] = class_idx
                    self.idx_to_class[class_idx] = class_name
                    class_idx += 1

                # Parcourir toutes les images de ce modèle
                for root, _, files in os.walk(model_path):
                    for file in files:
                        if file.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                            img_path = os.path.join(root, file)
                            self.image_paths.append(img_path)
                            self.labels.append(self.class_to_idx[class_name])
                            image_count += 1

        print(f"Dataset construit: {image_count} images, {len(self.class_to_idx)} classes")

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]

        # Charger l'image
        image = plt.imread(img_path)

        # Convertir en RGB si nécessaire
        if len(image.shape) == 2:  # Grayscale
            image = np.stack([image] * 3, axis=-1)
        elif image.shape[2] == 4:  # RGBA
            image = image[:, :, :3]

        # Appliquer les transformations
        if self.transform:
            image = self.transform(image)

        return image, label

# Transformations
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Créer le dataset
compcars_root = "archive"  # Dossier racine de CompCars
full_dataset = CompCarsDataset(compcars_root, transform=train_transform)

print(f"Nombre total de classes: {len(full_dataset.class_to_idx)}")
print(f"Nombre total d'images: {len(full_dataset)}")

# Split train/val
val_size = int(0.2 * len(full_dataset))
train_size = len(full_dataset) - val_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# DataLoaders
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# Modèle
num_classes = len(full_dataset.class_to_idx)
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

# Entraînement
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

num_epochs = 10  # Commencez avec moins d'epochs pour tester

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    running_corrects = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / train_size
    epoch_acc = running_corrects.double() / train_size
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}")

# Évaluation
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Matrice de confusion
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(12, 10))
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(xticks_rotation=90)
plt.title("Matrice de confusion")
plt.show()

# Fonction pour convertir les IDs de classe en noms lisibles
def get_readable_class_name(class_id):
    class_str = full_dataset.idx_to_class[class_id]
    make_id, model_id = class_str.replace('make', '').replace('model', '').split('_')
    return f"Make_{make_id}_Model_{model_id}"

# Afficher quelques prédictions
print("\nQuelques prédictions:")
for i in range(min(10, len(all_preds))):
    true_class = get_readable_class_name(all_labels[i])
    pred_class = get_readable_class_name(all_preds[i])
    correct = "✓" if all_preds[i] == all_labels[i] else "✗"
    print(f"{correct} True: {true_class}, Pred: {pred_class}")

Using device: cuda
Construction du dataset CompCars...
Dataset construit: 136726 images, 1716 classes
Nombre total de classes: 1716
Nombre total d'images: 136726




KeyboardInterrupt: 