In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Dataset, random_split, WeightedRandomSampler
from torchvision.datasets import ImageFolder
from tqdm import tqdm
import numpy as np
import os
from google.colab import drive
from collections import Counter
drive.mount('/content/drive')
import sys
sys.path.append('/content/drive/MyDrive/TR_DIMA/Logit_compensation/')
from logitadjust import LogitAdjust



# Path to the dataset and model
path_train_dataset = "/content/drive/MyDrive/TR_DIMA/training_set_reduit"

# Transformations for the dataset
base_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),  # Convert images to PyTorch tensors
])

# Data augmentation

augmented_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.GaussianBlur(kernel_size=9, sigma=(0.01, 5)),
        transforms.RandomResizedCrop(size=224, scale=(0.7, 1.0), ratio = (0.2,5)),
        transforms.ToTensor(),
    ])









Mounted at /content/drive


In [None]:
# Load the training dataset

dataset_init = datasets.ImageFolder(root=path_train_dataset, transform=augmented_transform)



train_size = int(0.8 * len(dataset_init))
valid_size = len(dataset_init) - train_size

train_dataset, valid_dataset = random_split(dataset_init, [train_size, valid_size])

# Create DataLoaders for training and validation datasets

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=12, pin_memory=True)
val_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False, num_workers=12, pin_memory=True)

valid_classes = dataset_init.classes

In [None]:
# Number of elements in each class

num_all = Counter(dataset_init.targets)
distrib = [num_all[i] for i in range(0,13)]

In [None]:
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(valid_classes))

# Device config
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the model to the device
model.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 197MB/s]


In [None]:
# Training function for the first step of decoupling framework

num_epochs = 100
patience = 5
early_stopping_counter = 0
best_val_loss = float('inf')

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]", leave=False)

    for inputs, labels in train_bar:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        train_bar.set_postfix(loss=loss.item())

    train_loss = running_loss / len(train_loader.dataset)

    # Validation
    model.eval()
    val_loss = 0.0
    val_bar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]", leave=False)

    with torch.no_grad():
        for inputs, labels in val_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            val_bar.set_postfix(loss=loss.item())

    val_loss = val_loss / len(val_loader.dataset)

    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")

    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        torch.save(model.state_dict(), "/content/drive/MyDrive/TR_DIMA/Entrainement/best_model_pre_classifier_training.pth")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping déclenché.")
            break





KeyboardInterrupt: 

On charge l'ancien modèle et on créé un nouveau sampler equilibré

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet50(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, len(valid_classes))
model.to(device)


# Charger le meilleur modèle
model.load_state_dict(torch.load("/content/drive/MyDrive/TR_DIMA/Entrainement/best_model_pre_classifier_training.pth"))

#Second stage of training

train_indices = train_dataset.indices
train_labels = [dataset_init.samples[i][1] for i in train_indices]

class_counts = Counter(train_labels)
class_weights = {label: 1.0 / count for label, count in class_counts.items()}
sample_weights = [class_weights[label] for label in train_labels]

sampler = WeightedRandomSampler(weights=sample_weights, num_samples= 4*len(sample_weights), replacement=True)




[0.012046444248795023, 0.006100056303779527, 0.006100056303779527, 0.03641785203646149, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.012046444248795023, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.012046444248795023, 0.012046444248795023, 0.006100056303779527, 0.006100056303779527, 0.012046444248795023, 0.012046444248795023, 0.012046444248795023, 0.006100056303779527, 0.012046444248795023, 0.09853292781642932, 0.006100056303779527, 0.012046444248795023, 0.012046444248795023, 0.006100056303779527, 0.006100056303779527, 0.012046444248795023, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.006100056303779527, 0.012046444248795023, 0.006100056303779527, 0.006100056

In [None]:
# Create a new DataLoader with the sampler

train_loader = DataLoader(train_dataset, batch_size=128, sampler=sampler, num_workers=16, pin_memory=True)

# Reset the model's fully connected layer for the second stage of training

model.fc.reset_parameters()


# Freeze all layers except the last fully connected layer
for param in model.parameters():
    param.requires_grad = False

for param in model.fc.parameters():
    param.requires_grad = True

# Define the loss function and optimizer for the second stage
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=1e-5, weight_decay=1e-4)



In [None]:
num_epochs = 100
patience = 5
early_stopping_counter = 0
best_val_loss = float('inf')

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]", leave=False)

    for inputs, labels in train_bar:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        train_bar.set_postfix(loss=loss.item())

    train_loss = running_loss / len(train_loader.dataset)

    # Validation
    model.eval()
    val_loss = 0.0
    val_bar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]", leave=False)

    with torch.no_grad():
        for inputs, labels in val_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            val_bar.set_postfix(loss=loss.item())

    val_loss = val_loss / len(val_loader.dataset)

    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")

    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        torch.save(model.state_dict(), "/content/drive/MyDrive/TR_DIMA/Entrainement/best_model_post_classifier_training.pth")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping déclenché.")
            break






Epoch 1: Train Loss = 3.9165, Val Loss = 0.2556




Epoch 2: Train Loss = 1.5204, Val Loss = 0.1565




Epoch 3: Train Loss = 1.1512, Val Loss = 0.1340




Epoch 4: Train Loss = 1.0086, Val Loss = 0.1194




Epoch 5: Train Loss = 0.9350, Val Loss = 0.1137




Epoch 6: Train Loss = 0.8908, Val Loss = 0.1117




Epoch 7: Train Loss = 0.8605, Val Loss = 0.1141




Epoch 8: Train Loss = 0.8514, Val Loss = 0.1097




Epoch 9: Train Loss = 0.8212, Val Loss = 0.1061




Epoch 10: Train Loss = 0.8175, Val Loss = 0.1044




Epoch 11: Train Loss = 0.8004, Val Loss = 0.1139




Epoch 12: Train Loss = 0.8086, Val Loss = 0.1043




Epoch 13: Train Loss = 0.7836, Val Loss = 0.1070




Epoch 14: Train Loss = 0.7779, Val Loss = 0.1068




Epoch 15: Train Loss = 0.7920, Val Loss = 0.0968




Epoch 16: Train Loss = 0.7702, Val Loss = 0.1073




Epoch 17: Train Loss = 0.7766, Val Loss = 0.1090




Epoch 18: Train Loss = 0.7648, Val Loss = 0.1036




Epoch 19: Train Loss = 0.7559, Val Loss = 0.1057


                                                                               

Epoch 20: Train Loss = 0.7430, Val Loss = 0.1008
Early stopping déclenché.


