In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split

import os
import pandas as pd
from PIL import Image
import torch
from torchvision import transforms

from matplotlib import pyplot as plt

In [2]:


# Ton dossier de data (depuis CNN_simple.ipynb)
DATA_PATH = "../cartoonset10k"

transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

data_pairs = []   # (image_tensor, hair_color_index)

for file in os.listdir(DATA_PATH):
    if file.endswith(".csv"):
        csv_path = os.path.join(DATA_PATH, file)
        img_path = csv_path.replace(".csv", ".png")

        if not os.path.exists(img_path):
            continue

        # 1) Charger l'image
        img = Image.open(img_path).convert("RGB")
        img = transform(img)

        # 2) Charger le CSV SANS header
        df = pd.read_csv(csv_path, header=None)

        # 3) Nettoyer la colonne des noms d'attributs
        df[0] = df[0].str.replace('"', '')

        # 4) Récupérer la ligne correspondant à hair_color
        hair_row = df[df[0] == "hair_color"]
        if len(hair_row) == 0:
            continue  # au cas où

        hair_color_value = int(hair_row.iloc[0, 1])   # 2ème colonne = valeur du label

        data_pairs.append((img, hair_color_value))

print("Nombre de (image, hair_color) :", len(data_pairs))
print("Exemple y :", data_pairs[0][1])
print("Shape image x :", data_pairs[0][0].shape)


Nombre de (image, hair_color) : 10000
Exemple y : 2
Shape image x : torch.Size([3, 64, 64])


In [3]:
# =========================
# 1) Dataset & DataLoader
# =========================

class HairColorDataset(Dataset):
    def __init__(self, data_pairs):
        self.images = [x for x, _ in data_pairs]
        self.labels = [int(y) for _, y in data_pairs]   # 0..9 normalement

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        x = self.images[idx]
        y = torch.tensor(self.labels[idx], dtype=torch.long)
        return x, y

full_dataset = HairColorDataset(data_pairs)

# split train / val (80% / 20%)
train_size = int(0.8 * len(full_dataset))
val_size   = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

BATCH_SIZE = 64
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=BATCH_SIZE, shuffle=False)

In [4]:
import torch.nn as nn
import torch.nn.functional as F

class HairColorMLP(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(3 * 64 * 64, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, num_classes)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # flatten (B, 12288)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# nombre de classes
all_labels = [y for _, y in data_pairs]
num_classes = max(all_labels) + 1

model = HairColorMLP(num_classes=num_classes).to(device)


In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [8]:
from tqdm import tqdm

EPOCHS = 50

for epoch in range(EPOCHS):
    # ---- TRAIN ----
    model.train()
    train_loss = 0.0

    for imgs, labels in tqdm(train_loader):
        imgs = imgs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * imgs.size(0)

    train_loss /= len(train_dataset)

    # ---- VALID ----
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs = imgs.to(device)
            labels = labels.to(device)

            outputs = model(imgs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * imgs.size(0)

            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    val_loss /= len(val_dataset)
    val_acc = correct / total * 100

    print(f"Epoch {epoch+1}/{EPOCHS} | Train Loss:{train_loss:.4f} | "
          f"Val Loss:{val_loss:.4f} | Val Acc:{val_acc:.2f}%")


100%|██████████| 125/125 [00:04<00:00, 28.94it/s]


Epoch 1/50 | Train Loss:1.6629 | Val Loss:1.2514 | Val Acc:46.45%


100%|██████████| 125/125 [00:03<00:00, 34.03it/s]


Epoch 2/50 | Train Loss:1.5178 | Val Loss:1.2805 | Val Acc:35.80%


100%|██████████| 125/125 [00:03<00:00, 32.76it/s]


Epoch 3/50 | Train Loss:1.5762 | Val Loss:1.1447 | Val Acc:53.40%


100%|██████████| 125/125 [00:03<00:00, 33.52it/s]


Epoch 4/50 | Train Loss:1.5135 | Val Loss:1.4923 | Val Acc:30.50%


100%|██████████| 125/125 [00:03<00:00, 35.06it/s]


Epoch 5/50 | Train Loss:1.5149 | Val Loss:1.3265 | Val Acc:35.10%


100%|██████████| 125/125 [00:03<00:00, 32.98it/s]


Epoch 6/50 | Train Loss:1.4654 | Val Loss:1.1136 | Val Acc:47.85%


100%|██████████| 125/125 [00:04<00:00, 26.66it/s]


Epoch 7/50 | Train Loss:1.4588 | Val Loss:1.2450 | Val Acc:29.95%


100%|██████████| 125/125 [00:03<00:00, 32.49it/s]


Epoch 8/50 | Train Loss:1.4636 | Val Loss:1.3305 | Val Acc:32.05%


100%|██████████| 125/125 [00:03<00:00, 34.44it/s]


Epoch 9/50 | Train Loss:1.4733 | Val Loss:1.4337 | Val Acc:30.75%


100%|██████████| 125/125 [00:04<00:00, 28.54it/s]


Epoch 10/50 | Train Loss:1.4649 | Val Loss:1.3963 | Val Acc:28.80%


100%|██████████| 125/125 [00:04<00:00, 30.68it/s]


Epoch 11/50 | Train Loss:1.4376 | Val Loss:1.1970 | Val Acc:39.15%


100%|██████████| 125/125 [00:04<00:00, 27.36it/s]


Epoch 12/50 | Train Loss:1.4298 | Val Loss:1.3892 | Val Acc:25.25%


100%|██████████| 125/125 [00:04<00:00, 25.43it/s]


Epoch 13/50 | Train Loss:1.4274 | Val Loss:1.4004 | Val Acc:32.65%


100%|██████████| 125/125 [00:05<00:00, 24.64it/s]


Epoch 14/50 | Train Loss:1.4521 | Val Loss:1.4285 | Val Acc:40.70%


100%|██████████| 125/125 [00:05<00:00, 23.77it/s]


Epoch 15/50 | Train Loss:1.4526 | Val Loss:1.5306 | Val Acc:28.85%


100%|██████████| 125/125 [00:04<00:00, 30.99it/s]


Epoch 16/50 | Train Loss:1.4468 | Val Loss:1.4250 | Val Acc:33.35%


100%|██████████| 125/125 [00:03<00:00, 31.53it/s]


Epoch 17/50 | Train Loss:1.3740 | Val Loss:1.3581 | Val Acc:31.55%


100%|██████████| 125/125 [00:04<00:00, 31.06it/s]


Epoch 18/50 | Train Loss:1.3652 | Val Loss:1.3674 | Val Acc:30.50%


100%|██████████| 125/125 [00:04<00:00, 27.88it/s]


Epoch 19/50 | Train Loss:1.3863 | Val Loss:1.5955 | Val Acc:35.55%


100%|██████████| 125/125 [00:06<00:00, 20.44it/s]


Epoch 20/50 | Train Loss:1.3696 | Val Loss:1.6317 | Val Acc:22.25%


100%|██████████| 125/125 [00:04<00:00, 27.56it/s]


Epoch 21/50 | Train Loss:1.3566 | Val Loss:1.4909 | Val Acc:27.50%


100%|██████████| 125/125 [00:04<00:00, 31.24it/s]


Epoch 22/50 | Train Loss:1.3647 | Val Loss:1.8950 | Val Acc:25.35%


100%|██████████| 125/125 [00:04<00:00, 28.61it/s]


Epoch 23/50 | Train Loss:1.3649 | Val Loss:1.4818 | Val Acc:38.35%


100%|██████████| 125/125 [00:04<00:00, 26.48it/s]


Epoch 24/50 | Train Loss:1.3436 | Val Loss:1.7487 | Val Acc:20.85%


100%|██████████| 125/125 [00:04<00:00, 27.90it/s]


Epoch 25/50 | Train Loss:1.4210 | Val Loss:1.7662 | Val Acc:30.25%


100%|██████████| 125/125 [00:03<00:00, 34.18it/s]


Epoch 26/50 | Train Loss:1.3574 | Val Loss:1.4458 | Val Acc:29.20%


100%|██████████| 125/125 [00:04<00:00, 30.91it/s]


Epoch 27/50 | Train Loss:1.3005 | Val Loss:1.5004 | Val Acc:28.40%


100%|██████████| 125/125 [00:04<00:00, 28.09it/s]


Epoch 28/50 | Train Loss:1.3634 | Val Loss:1.6129 | Val Acc:42.85%


100%|██████████| 125/125 [00:03<00:00, 31.33it/s]


Epoch 29/50 | Train Loss:1.4819 | Val Loss:1.5395 | Val Acc:30.95%


100%|██████████| 125/125 [00:04<00:00, 29.30it/s]


Epoch 30/50 | Train Loss:1.4985 | Val Loss:1.6808 | Val Acc:36.30%


100%|██████████| 125/125 [00:04<00:00, 27.06it/s]


Epoch 31/50 | Train Loss:1.4325 | Val Loss:1.6361 | Val Acc:35.10%


100%|██████████| 125/125 [00:04<00:00, 25.07it/s]


Epoch 32/50 | Train Loss:1.4755 | Val Loss:1.8244 | Val Acc:25.90%


100%|██████████| 125/125 [00:03<00:00, 31.27it/s]


Epoch 33/50 | Train Loss:1.4623 | Val Loss:1.6620 | Val Acc:35.70%


100%|██████████| 125/125 [00:03<00:00, 31.34it/s]


Epoch 34/50 | Train Loss:1.4400 | Val Loss:2.1338 | Val Acc:26.10%


100%|██████████| 125/125 [00:04<00:00, 26.26it/s]


Epoch 35/50 | Train Loss:1.4382 | Val Loss:1.8525 | Val Acc:27.25%


100%|██████████| 125/125 [00:03<00:00, 32.16it/s]


Epoch 36/50 | Train Loss:1.4444 | Val Loss:1.7503 | Val Acc:27.80%


100%|██████████| 125/125 [00:03<00:00, 31.55it/s]


Epoch 37/50 | Train Loss:1.4574 | Val Loss:1.6008 | Val Acc:31.60%


100%|██████████| 125/125 [00:03<00:00, 31.83it/s]


Epoch 38/50 | Train Loss:1.4106 | Val Loss:1.6467 | Val Acc:33.05%


100%|██████████| 125/125 [00:03<00:00, 31.92it/s]


Epoch 39/50 | Train Loss:1.4326 | Val Loss:1.5674 | Val Acc:31.10%


100%|██████████| 125/125 [00:03<00:00, 32.56it/s]


Epoch 40/50 | Train Loss:1.4640 | Val Loss:1.9479 | Val Acc:27.45%


100%|██████████| 125/125 [00:03<00:00, 31.77it/s]


Epoch 41/50 | Train Loss:1.4323 | Val Loss:1.8483 | Val Acc:31.10%


100%|██████████| 125/125 [00:03<00:00, 32.23it/s]


Epoch 42/50 | Train Loss:1.4889 | Val Loss:1.4595 | Val Acc:40.70%


100%|██████████| 125/125 [00:04<00:00, 30.43it/s]


Epoch 43/50 | Train Loss:1.3922 | Val Loss:1.5448 | Val Acc:39.85%


100%|██████████| 125/125 [00:04<00:00, 28.24it/s]


Epoch 44/50 | Train Loss:1.3691 | Val Loss:1.7484 | Val Acc:30.40%


100%|██████████| 125/125 [00:04<00:00, 29.88it/s]


Epoch 45/50 | Train Loss:1.3969 | Val Loss:1.7323 | Val Acc:35.80%


100%|██████████| 125/125 [00:04<00:00, 25.65it/s]


Epoch 46/50 | Train Loss:1.3928 | Val Loss:1.6394 | Val Acc:36.60%


100%|██████████| 125/125 [00:04<00:00, 29.47it/s]


Epoch 47/50 | Train Loss:1.5208 | Val Loss:1.7306 | Val Acc:29.20%


100%|██████████| 125/125 [00:03<00:00, 32.07it/s]


Epoch 48/50 | Train Loss:1.4600 | Val Loss:1.7637 | Val Acc:31.85%


100%|██████████| 125/125 [00:04<00:00, 31.02it/s]


Epoch 49/50 | Train Loss:1.4136 | Val Loss:1.8788 | Val Acc:27.55%


100%|██████████| 125/125 [00:03<00:00, 32.14it/s]


Epoch 50/50 | Train Loss:1.3801 | Val Loss:1.3802 | Val Acc:39.35%


In [9]:
model.eval()
img, true_label = full_dataset[0]
with torch.no_grad():
    pred = torch.argmax(model(img.unsqueeze(0).to(device))).item()

print("True label:", true_label.item())
print("Predicted :", pred)

True label: 2
Predicted : 2


In [10]:
# ======================
#  TEST ACCURACY
# ======================
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for imgs, labels in val_loader:   # ou test_loader si tu en as un !
        imgs = imgs.to(device)
        labels = labels.to(device)

        outputs = model(imgs)          # logits
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_acc = correct / total * 100
print(f"Test Accuracy: {test_acc:.2f}%")


Test Accuracy: 39.35%


---

---

---

In [11]:
# extraire X et y en numpy
X = torch.stack([img.view(-1) for img, _ in data_pairs]).numpy()   # flatten
y = torch.tensor([y for _, y in data_pairs]).numpy()

from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# 1) logistic regression
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)
print("Logistic Regression Acc:", clf.score(X_val, y_val))

# 2) SVM (plus puissant)
from sklearn.svm import SVC
svm = SVC(kernel='rbf')
svm.fit(X_train, y_train)
print("SVM Acc:", svm.score(X_val, y_val))


Logistic Regression Acc: 0.98
SVM Acc: 0.959
