In [None]:
from utils import FilteredImageFolder, count_trainable_parameters, count_elements_per_class
from torchvision import transforms, datasets
from torchvision.models import vgg16
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.nn as nn
from tqdm import tqdm
import numpy as np
import random
import torch
import os

manualSeed = 999 
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)
torch.use_deterministic_algorithms(True)  
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"runnning on {device}")

# Hyperparameters

In [None]:
batch_size = 16
learning_rate = 0.001
num_epochs = 1

# Datasets

In [None]:
data_dir = r"C:\Users\yaoko\Downloads\archive\fruits-360-original-size\fruits-360-original-size"
TRAIN = "Training"
VAL = "Validation"
TEST = "Test"
data_transforms = {
    TRAIN: transforms.Compose(
        [
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
        ]
    ),
    VAL: transforms.Compose(
        [
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
        ]
    ),
    TEST: transforms.Compose(
        [
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
        ]
    ),
}
classes_to_include = [
    "apple_6",
    "apple_braeburn_1",
    "apple_crimson_snow_1",
]
image_datasets = {
    # x: datasets.ImageFolder(os.path.join(data_dir, x), transform=data_transforms[x])
    x: FilteredImageFolder(
        root=os.path.join(data_dir, x),
        classes_to_include=classes_to_include,
        transform=data_transforms[x],
    )
    for x in [TRAIN, VAL, TEST]
}
dataloaders = {
    x: torch.utils.data.DataLoader(
        image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4
    )
    for x in [TRAIN, VAL, TEST]
}
dataset_sizes = {x: len(image_datasets[x]) for x in [TRAIN, VAL, TEST]}
for x in [TRAIN, VAL, TEST]:
    print("Loaded {} images under {}".format(dataset_sizes[x], x))
class_names = image_datasets[TRAIN].classes
n_classes = len(class_names)

In [None]:
for x in [TRAIN, VAL, TEST]:
    print("-" * 30)
    print(x)
    class_counts = count_elements_per_class(image_datasets[x])
    for class_name, count in class_counts.items():
        print(f"\t{class_name} => : {count}")

In [None]:
num_images_to_display = 20
dataset = image_datasets[TRAIN]
fig, axes = plt.subplots(4, 5, figsize=(15, 8))
for i, ith_dataset in enumerate(
    np.random.randint(low=0, high=len(dataset), size=num_images_to_display)
):
    image, label = dataset[ith_dataset]
    axes[i // 5, i % 5].imshow(image.numpy().transpose((1, 2, 0)))
    axes[i // 5, i % 5].axis("off")
    axes[i // 5, i % 5].set_title(class_names[label])
plt.show()

# Model

In [None]:
model = vgg16(weights="DEFAULT")
n1 = count_trainable_parameters(model)
print("VGG16 number of trainable parameters", f"{n1:,d}")
# Freeze training for all layers
for param in model.parameters():
    param.require_grad = False
n_inputs = model.classifier[6].in_features
model.classifier[6] = nn.Sequential(
    nn.Linear(n_inputs, n_classes), nn.LogSoftmax(dim=1)
)
n2 = count_trainable_parameters(model)
print("Custom model number of trainable parameters", f"{n2:,d}")
print(f"{(n1-n2):,d}")
print(model.classifier)

# Training

In [None]:
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Boucle d'entraînement
train_losses = []
dataloader = dataloaders[TRAIN]
n_batch = len(dataloader)
print_nbatch_step = 10
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0
    print(f"Epoch {epoch+1}...")
    batch_loss = 0.0
    for batch_idx, batch_data in enumerate(tqdm(dataloader, total=n_batch)):
        images = batch_data[0].to(device)
        labels = batch_data[1].to(device)
        # Réinitialiser les gradients
        optimizer.zero_grad()
        # Propagation avant
        predictions = model(images)
        # Calcul de la perte
        loss = criterion(predictions, labels)
        # Rétropropagation
        loss.backward()
        # Mise à jour des poids
        optimizer.step()
        epoch_loss += loss.item()
        batch_loss += loss.item()
        if (batch_idx + 1) % print_nbatch_step == 0: 
            print(f"  Batch [{batch_idx+1}/{n_batch}], Avg Loss: {batch_loss/print_nbatch_step:.4f}")
            batch_loss = 0.0
    epoch_loss /= n_batch
    train_losses.append(epoch_loss)
    print(f"Epoch [{epoch+1}/{num_epochs}], loss: {epoch_loss:.4f}")

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs + 1), train_losses, marker='o', linestyle='-', color='b')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss per Epoch')
plt.grid(True)
plt.show()

# Test

In [None]:
model.eval()
test_losses = []
correct = 0
total = 0
with torch.no_grad():
    for batch_data in tqdm(dataloaders[TEST]):
        images = batch_data[0].to(device)
        labels = batch_data[1].to(device)
        predictions = model(images)
        loss = criterion(predictions, labels)
        test_losses.append(loss.item())
        _, predicted = torch.max(predictions, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
average_test_loss = np.mean(test_losses)
print(f"Average Test Loss: {average_test_loss:.4f}")
accuracy = 100 * correct / total
print(f"Accuracy on Test Data: {accuracy:.2f}%")

In [None]:
test_loader = dataloaders[TEST]
batch_idx, (images, labels) = next(enumerate(test_loader))
dataset = image_datasets[TEST]
model.eval()
with torch.no_grad():
    output = model(images)  
    fig, axes = plt.subplots(5, 3, figsize=(15, 8))
    predictions = output.argmax(dim=1)  
    for i, ax in enumerate(axes.flat):    
        if i >= len(predictions) :
            break    
        ax.imshow(images[i].numpy().transpose((1, 2, 0)))
        prediction = predictions[i].item()
        target = labels[i].item()
        ax.axis("off")
        ax.set_title(f"pred={class_names[prediction]}, real={class_names[target]}", color='green' if prediction == target else 'red')
    plt.show()