# LIBRARIES

In [None]:
import torch
import json
import os
import pandas as pd
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import scipy
import collections
from PIL import Image
import pickle
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models.mobilenetv2 import Conv2dNormActivation, InvertedResidual
import torch.optim as optim
import itertools
from torch.optim.lr_scheduler import CosineAnnealingLR
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import precision_score, recall_score, f1_score

# CUDA

In [None]:
# Train on GPU if available
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

device = torch.device("cuda:0" if train_on_gpu else "cpu")

print(device)

# IMPORT THE DATASET

In [None]:
'''

This part of the code serves the purpose of dowloading
the dataset in a proper way for kaggle. A file json is created
to download the dataset from kaggle.

'''

# Create file kaggle.json to download from Kaggle
kaggle_json = {
    "username": "enricomarta",
    "key": "84d386ad7a7c02ed584cae00085f289b"
}

# Path of the kaggle.json
kaggle_json_path = '/kaggle/working/kaggle.json'

# Write on kaggle.json
with open(kaggle_json_path, 'w') as file:
    json.dump(kaggle_json, file)

# Kaggle's enviornment
os.environ['KAGGLE_CONFIG_DIR'] = '/kaggle/working'

# Correctness of the information
!chmod 600 /kaggle/working/kaggle.json

print("Downloading the dataset from Kaggle...")
!kaggle competitions download -c ifood-2019-fgvc6 -p /kaggle/working --force

In [None]:
# unzip files
!unzip -o ifood-2019-fgvc6.zip -d /kaggle/working > /dev/null
!unzip -o train_set.zip -d /kaggle/working > /dev/null

!ls

In [None]:
# Remove unnecesary files for the project
!rm class_list.txt ifood-2019-fgvc6.zip sample_submission.csv ifood2019_sample_submission.csv test_set.zip train_set.zip val_labels.csv val_set.zip

# PRE-PROCESSING

In [None]:
'''
Here training and validation set are created and saved.
Then number of images contained in both are providded.
'''

# path of the csv file
file_path = '/kaggle/working/train_labels.csv'
data = pd.read_csv(file_path)

# Split the data into training and validation sets (30% for validation)
train_data, validation_data = train_test_split(data, test_size=0.3, stratify=data['label'], random_state=42)

# Save the validation set
validation_data.to_csv('validation_set.csv', index=False)

# Save the training set
train_data.to_csv('training_set.csv', index=False)

num_train_rows = train_data.shape[0]
num_validation_rows = validation_data.shape[0]
print(f"Number of rows in the training set: {num_train_rows}")
print(f"Number of rows in the validation set: {num_validation_rows}")

In [None]:
'''
Here the 'final dataset' is created, augmenting
under-represented classes via duplication, and then saved.
'''

# Path of the CSV file
file_path = '/kaggle/working/training_set.csv'

# Upload the CSV file
data = pd.read_csv(file_path)

# Number of rows
num_rows = data.shape[0]
#print(f"Number of rows: {num_rows}")

# Number of labels
num_labels = data['label'].nunique()
#print(f"Number of labels: {num_labels}")

# Number of observations for each label
images_per_label = data['label'].value_counts()
#print("Number of images per label (ascending order):")
#print(images_per_label.sort_values().head(10))

# Find classes with fewer than 300 observations
classes_to_duplicate = images_per_label[images_per_label < 300].index

# DataFrame for the duplicate images
duplicated_data = []

# For each label that doesn't have enough images
for label in classes_to_duplicate:
    rows_to_duplicate = data[data['label'] == label]
    # Calculate how many more images are needed to reach 300
    count_needed = 300 - len(rows_to_duplicate)
    # Generates a sample of duplicate rows from the current class
    duplicated_rows = rows_to_duplicate.sample(n=count_needed, replace=True, random_state=1)
    # Concatenates the original rows together with the duplicated rows
    duplicated_data.append(pd.concat([rows_to_duplicate, duplicated_rows], ignore_index=True))

# Merge all positions contained in the list
final_duplicated_data = pd.concat(duplicated_data, ignore_index=True)

# Well represented data (already have 300 or more observations)
well_represented_data = data[data['label'].value_counts()[data['label']].values >= 300]

# Merge to obtain the final new dataframe
final_data = pd.concat([well_represented_data, final_duplicated_data], ignore_index=True)

# Create the new file CSV
final_file_path = '/kaggle/working/modified_train_labels.csv'
final_data.to_csv(final_file_path, index=False)

# New Training Dataset
data_modified = pd.read_csv(final_file_path)
num_rows_modified = len(data_modified)
print(f"Number of images in modified file: {num_rows_modified}")
images_per_label_mod = data_modified['label'].value_counts()
print("Number of images per label (ascending order):")
print(images_per_label_mod.sort_values().head(10))

In [None]:
'''
Here the class 'FoodDataset is created.
It provides an appropriate way to manage images.
'''

class FoodDataset(Dataset):

    def __init__(self, root_dir, transform, split):
        """
        Args:
            root_dir (string): Directory containing the images.
            transform (callable, optional): Transformation to be applied to the images.
            split (string): Split type ("train" or "val").
        """
        self.split = split
        self.root_dir = root_dir
        self.data = self.get_data_train()
        self.data_v = self.get_data_val()

        if split == "train":
            self.data = self.data
        elif split == "val":
            self.data = self.data_v
        self.transform = transform

    def get_data_train(self):
        df = pd.read_csv('/kaggle/working/training_set.csv')
        # Construct full image paths using vectorized operations
        df['img_name'] = self.root_dir + "/" + df['img_name']
        # Filter for images with existing paths using vectorized boolean indexing
        df = df[df['img_name'].apply(os.path.exists)]
        return df.to_numpy()  # Return preprocessed data as a NumPy array

    def get_data_val(self):
        df = pd.read_csv('/kaggle/working/validation_set.csv')
        # Construct full image paths using vectorized operations
        df['img_name'] = self.root_dir + "/" + df['img_name']
        # Filter for images with existing paths using vectorized boolean indexing
        df = df[df['img_name'].apply(os.path.exists)]
        return df.to_numpy()  # Return preprocessed data as a NumPy array

    def __len__(self):
        """
        Returns the length of the dataset (number of samples).

        This method overrides the default behavior of `len` for the dataset object.
        It simply returns the length of the internal `data` list, which represents
        the preprocessed data after loading and filtering.
        """
        return len(self.data)

    def __getitem__(self, idx):
        """
        Retrieves a sample at a given index.

        This method overrides the default behavior of indexing for the dataset object.
        It takes an index `idx` and performs the following:
            1. Accesses the image name and num_class at the specified index from `self.data`.
            2. Opens the image using `Image.open` with the full path constructed by
               combining `self.root_dir` and `img_name`.
            3. Applies the defined transformation (`self.transform`) to the image.
            4. Creates a dictionary `sample` containing the preprocessed image (`image`)
               and the num_class as a PyTorch tensor (`torch.tensor(num_class).float()`).
            5. Returns the constructed `sample` dictionary.
        """
        img_name, num_class = self.data[idx]
        image = Image.open(os.path.join(self.root_dir, img_name))
        image = self.transform(image)

        sample = {'img_name': image, 'labels': torch.tensor(num_class).float()}
        return sample


In [None]:
'''
Functions for data transformation are provided,
distinguishing between trainin and validation sets.
Creation of Dataloaders.
'''
transform_train = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize images to 256x256
    transforms.RandomHorizontalFlip(p=0.5),  # Randomly flip images horizontally with a probability of 50%
    transforms.RandomAffine(degrees=0, shear=0.2),  # Apply random shear transformations
    transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0), ratio=(1.0, 1.0)),  # Apply random zoom transformations
    transforms.ToTensor(),  # Convert PIL images to PyTorch tensors
    transforms.Normalize(  # Normalize pixel values based on ImageNet statistics
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])
transform_val = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize images to 256x256 (consistent with training)
    transforms.ToTensor(),  # Convert PIL images to PyTorch tensors
    transforms.Normalize(  # Normalize pixel values using the same statistics
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


# Set batch size
bs = 100

# Create datasets for training and validation
trainset = FoodDataset("/kaggle/working/train_set", transform_train, split="train")
valset = FoodDataset("/kaggle/working/train_set", transform_val, split="val")

# Create data loaders for efficient batch training and evaluation
trainloader = torch.utils.data.DataLoader(trainset, batch_size=bs, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=1, shuffle=False)

# Print dataset and dataloader lengths (number of samples and batches)
print(f"Number of training samples: {len(trainloader) * bs}")
print(f"Number of validation samples: {len(valloader)}")

# CNN

In [None]:
'''
Here the architecture of the CNN is provided,
two different classifiers.
Additional functions for SSL pretext task are present.

'''


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Let's create a reduced version of MobileNetV2 (lower than 1 million)
        self.features = nn.Sequential(
            Conv2dNormActivation(3, 32, kernel_size=3, stride=2, padding=1),
            InvertedResidual(32, 16, stride=1, expand_ratio=1),
            InvertedResidual(16, 24, stride=2, expand_ratio=6),
            InvertedResidual(24, 24, stride=1, expand_ratio=6),
            InvertedResidual(24, 32, stride=2, expand_ratio=6),
            InvertedResidual(32, 32, stride=1, expand_ratio=6),
            InvertedResidual(32, 32, stride=1, expand_ratio=6),
            InvertedResidual(32, 64, stride=2, expand_ratio=6),
            InvertedResidual(64, 64, stride=1, expand_ratio=6),
            InvertedResidual(64, 64, stride=1, expand_ratio=6),
            InvertedResidual(64, 64, stride=1, expand_ratio=6),
            InvertedResidual(64, 96, stride=1, expand_ratio=6),
            InvertedResidual(96, 96, stride=2, expand_ratio=6),
            InvertedResidual(96, 96, stride=2, expand_ratio=6),
            Conv2dNormActivation(96, 800, kernel_size=1, stride=1, padding=0)
        )

        # Definition of the classifiers (one ending with 9, the other 251)
        self.classifier_puzzle = self._create_classifier(9)
        self.classifier_true = self._create_classifier(251)

    def _create_classifier(self, num_classes):
        return nn.Sequential(
            nn.Linear(800, 445),
            nn.GELU(),
            nn.Linear(445, 32),
            nn.GELU(),
            nn.Linear(32, num_classes)
        )
    # Forward common to both classifiers
    def forward_shared(self, x):
        x = self.features(x)
        x = nn.functional.adaptive_avg_pool2d(x, (1, 1)).flatten(1)
        return x
    # Forward for puzzle classifier
    def forward_puzzle(self, x):
        x = self.forward_shared(x)
        x = self.classifier_puzzle(x)
        return x
    # Forward for true classifier
    def forward_true(self, x):
        x = self.forward_shared(x)
        x = self.classifier_true(x)
        return x

    def _preprocess(self, images):
        """
        Preprocess the input images to generate puzzles and create corresponding targets.

        Parameters:
            images (Tensor): Batch of input images.

        Returns:
            images_batch (Tensor): Batch of shuffled puzzle pieces.
            targets (Tensor): Corresponding targets for the original puzzle piece positions.
        """
        batch_size, channels, height, width = images.shape
        grid_size = 3
        piece_height = height // grid_size
        piece_width = width // grid_size

        # Create grid indices
        indices = torch.arange(grid_size * grid_size).view(grid_size, grid_size)

        # Extract pieces and create the puzzle
        pieces = []
        targets = []
        for i in range(grid_size):
            for j in range(grid_size):
                piece = images[:, :, i*piece_height:(i+1)*piece_height, j*piece_width:(j+1)*piece_width]
                pieces.append(piece)
                targets.append(indices[i, j].repeat(batch_size))

        pieces = torch.stack(pieces, dim=1)  # Shape: [batch_size, num_pieces, channels, piece_height, piece_width]
        targets = torch.stack(targets, dim=1)  # Shape: [batch_size, num_pieces]

        # Shuffle the pieces
        shuffled_indices = torch.randperm(grid_size * grid_size)
        shuffled_pieces = pieces[:, shuffled_indices, :, :, :]
        shuffled_targets = targets[:, shuffled_indices]

        # Flatten the pieces and targets
        shuffled_pieces = shuffled_pieces.view(-1, channels, piece_height, piece_width)
        shuffled_targets = shuffled_targets.view(-1)

        return shuffled_pieces, shuffled_targets


net = Net()
net.to(device)


# TRAINING and EVALUATION

In [None]:
'''
Define training parameters (epochs, loss function, optimizer, and scheduler)
Threee different optimizers for shared, puzzle, true.
'''

epochs = 60  # Number of training epochs
criterion = nn.CrossEntropyLoss()  # Cross Entropy loss function for classification

optimizer_shared = optim.Adam(net.features.parameters(), lr=0.001)  # Adam optimizer with learning rate 0.001
scheduler_shared = CosineAnnealingLR(optimizer_shared,
                              T_max=len(trainloader) * epochs,  # Maximum number of iterations for scheduler
                              eta_min=1e-5)  # Minimum learning rate for scheduler
optimizer_puzzle = optim.Adam(net.classifier_puzzle.parameters(), lr=0.001)  # Adam optimizer with learning rate 0.001
scheduler_puzzle = CosineAnnealingLR(optimizer_puzzle,
                              T_max=len(trainloader) * epochs,  # Maximum number of iterations for scheduler
                              eta_min=1e-5)  # Minimum learning rate for scheduler
optimizer_true = optim.Adam(net.classifier_true.parameters(), lr=0.001)  # Adam optimizer with learning rate 0.001
scheduler_true = CosineAnnealingLR(optimizer_true,
                              T_max=len(trainloader) * epochs,  # Maximum number of iterations for scheduler
                              eta_min=1e-5)  # Minimum learning rate for scheduler


In [None]:
'''
Training and Validation section.
'''

# Uncomment if you need to complete the training
#net.load_state_dict(torch.load("/kaggle/input/vecchio/net_best.pth"))

# Parameters for the early stopping procedure
patience = 3
best_val_loss = float('inf')
epochs_no_improve = 0

# Lists to evaluate the final model
all_preds = []
all_labels = []
all_preds_true = []
all_labels_true = []

# Training
for epoch in range(epochs)

    running_loss = []  # Store puzzle's training loss for each batch
    running_loss_true = [] # Store true training loss for each batch
    net.train()

    for i, data in enumerate(trainloader):

        # Get inputs and labels from the data loader
        inputs, labels = data["img_name"], data["labels"]
        inputs = inputs.to(device)
        labels = labels.to(device).long()

        # Zero the parameter gradients before each backward pass
        optimizer_shared.zero_grad()
        optimizer_puzzle.zero_grad()

        # Generation of the training images and labels for puzzle's task
        shuffled_pieces, targets = net._preprocess(inputs)
        shuffled_pieces, targets = shuffled_pieces.to(device), targets.to(device)

        # Forward procedure for puzzle's task
        outputs = net.forward_puzzle(shuffled_pieces)
        outputs = outputs.to(device)

        # Loss for puzzle's task
        loss = criterion(outputs.squeeze(), targets)
        # Backward pass and parameter update
        loss.backward()
        optimizer_shared.step()
        optimizer_puzzle.step()

        # Let's do the same for true task
        optimizer_shared.zero_grad()
        optimizer_true.zero_grad()
        outputs_true = net.forward_true(inputs)
        outputs_true = outputs_true.to(device)
        loss_true = criterion(outputs_true.squeeze(), labels)
        loss_true.backward()
        optimizer_true.step()

        # Update the scheduler
        scheduler_shared.step()
        scheduler_puzzle.step()
        scheduler_true.step()

        running_loss.append(loss.item())
        running_loss_true.append(loss_true.item())
        # Print statistics (every 10% of the training data)
        if (i + 1) % (len(trainloader) // 10) == 0:
            print('%d, [%d, %d] loss puzzle: %.4f\tlr: %.6f\tloss true: %.6f' %
                  (epoch + 1, i + 1, len(trainloader), np.mean(running_loss), optimizer_true.param_groups[-1]['lr'],np.mean(running_loss_true)))
            running_loss = []
            running_loss_true = []
    # Validation
    running_loss = []  # List to store validation loss for each batch
    running_loss_true = []
    net.eval()  # Set the model to evaluation mode

    # Variables for Evaluation of the model
    correct = 0
    total = 0
    total_true = 0
    correct_true = 0

    for i, data in enumerate(valloader):
        # Get inputs and labels from the data loader
        inputs, labels = data["img_name"], data["labels"]
        inputs = inputs.to(device)
        labels = labels.to(device).long()

        shuffled_pieces, targets = net._preprocess(inputs)
        shuffled_pieces, targets = shuffled_pieces.to(device), targets.to(device)
        # Forward pass with gradient suppression
        with torch.no_grad():
            outputs = net.forward_puzzle(shuffled_pieces)
            outputs = outputs.to(device)

            # Finds the class with the highest probability for each image in the batch.
            _, predicted = torch.max(outputs.cpu(), 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(targets.cpu().numpy())
            # label.size(0) gives the batch size
            total += targets.size(0)
            # True positives
            correct += (predicted == targets.cpu()).sum().item()


            # Same for the true task
            outputs_true = net.forward_true(inputs)
            outputs_true = outputs_true.to(device)
            _, predicted_true = torch.max(outputs_true.cpu(), 1)
            all_preds_true.extend(predicted_true.cpu().numpy())
            all_labels_true.extend(labels.cpu().numpy())
            total_true += labels.size(0)
            correct_true += (predicted_true == labels.cpu()).sum().item()
        # Calculate loss for puzzle
        loss = criterion(outputs, targets)
        running_loss.append(loss.item())

        # Calculate true loss
        loss_true = criterion(outputs_true, labels)
        running_loss_true.append(loss_true.item())
    # Calculate mean of true and puzzle's validation loss
    val_loss = np.mean(running_loss)
    val_loss_true = np.mean(running_loss_true)

    print('Validation loss puzzle: %.6f\tValidation loss true: %.6f' % (val_loss, val_loss_true))
    # Calculate accuracy
    accuracy = 100 * correct / total
    accuracy_true = 100 * correct_true / total_true
    print(f'Validation accuracy puzzle: {accuracy:.2f} %\tValidation accuracy true: {accuracy_true:.2f} %')

    # Early stopping procedure if no improvement in puzzle validation loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0

        torch.save(net.state_dict(), "net_best.pth")
        print(f"Epoch {epoch+1}: Validation loss puzzle improved to {val_loss:.6f}. Model saved.")
    else:
        epochs_no_improve += 1
        print(f"Epoch {epoch+1}: No improvement in validation loss ({val_loss:.6f}). Epochs without improvement: {epochs_no_improve}/{patience}")
    if epochs_no_improve == patience:
        print(f"Early stopping triggered after {epoch+1} epochs. Best validation loss: {best_val_loss:.6f}")
        break


print('Finished Training')

