# Supervised Learning part of the project 

In [None]:
import torch  # Import the PyTorch library

device = torch.device("mps:0") if torch.backends.mps.is_available() else torch.device("cpu")    # Check if GPU is available
print("Device:", device)    # Print the device



In [None]:
# Import the libraries
import glob  
from PIL import Image 
import numpy as np  
import os  
import matplotlib.pyplot as plt  
from torch.utils.data import Dataset, DataLoader 
from torchvision import transforms, utils  
import pandas as pd
import scipy  
import seaborn as sns 


Define a path pattern to search for all jpg images 


In [None]:
image_path_pattern = "/Users/stefanocarotti/Supervised/Project/train_set/*.jpg"

# Find all image file paths matching the pattern
image_paths = glob.glob(image_path_pattern)

# Print the number of images found
print("Number of images found:", len(image_paths))
Image.open(image_paths[0])  # Show the first image in the list

# Mean and standard deviation of the images
The results are:
Mean pixel values: [0.63867598 0.54430306 0.44470324]

Standard deviation of pixel values: [0.22658289 0.24506914 0.26728659] 


DO NOT RE RUN THIS CELL SINCE IT TAKES A LOT OF TIME

In [None]:
# Initialize variables to store the sum and sum of squares of pixel values
mean_pixels = np.zeros(3)  # Initialize mean of pixel values for each channel
std_pixels = np.zeros(3)  # Initialize std of pixel values for each channel

# Iterate over all images
for image_path in image_paths:
    # Open the image
    image = Image.open(image_path)
    #Resize the image to 256x256
    image = image.resize((256, 256))
    
    # Convert the image to a NumPy array for each channel
    
    image_np = np.array(image)
    
    # Compute the mean of pixel values for each channel
    
    mean_pixels += np.mean(image_np, axis=(0, 1))
    
    # Compute the std of pixel values
    std_pixels += np.std(image_np, axis=(0, 1))
    
# Compute the mean and standard deviation
num_images = len(image_paths)  # Get the total number of images
mean_pixels = mean_pixels / (num_images)  # Compute the mean pixel values
std_pixels = std_pixels / (num_images)  # Compute the standard deviation of pixel values

# Print the mean and standard deviation
print("Mean pixel values:", mean_pixels)
print("Standard deviation of pixel values:", std_pixels)

#scale mean and std to 0-1
mean_pixels = mean_pixels / 255
std_pixels = std_pixels / 255

# Print the scaled mean and standard deviation
print("Mean pixel values:", mean_pixels)
print("Standard deviation of pixel values:", std_pixels)



# Check if all images are RGB
Since All images are RGB there is no need to convert them

DO NOT RE RUN THIS CELL SINCE IT TAKES A LOT OF TIME

In [None]:

rgb_images = 0  # Initialize an empty list to store RGB images
for image_path in image_paths:
  # Open the image 
  image = Image.open(image_path)

  # Get the number of color channels in the image (e.g., RGB has 3 channels)
  num_channels = len(image.getbands())

  # Check if the image has a different number of channels than expected (likely grayscale or unsupported format)
  if num_channels != 3:
    rgb_images += 1  # Increment the count of non-RGB images

# Print the number of non-RGB images found
print("Number of non-RGB images found:", rgb_images)
      

# Define a custom dataset class
Then create dataloader for training, validation and testing

In [None]:
class FoodDataset(Dataset): 
      """food dataset.

      This class loads and preprocesses a dataset of food images with corresponding labels.
      """

      def __init__(self, root_dir, filelist , transform, split, exclude_labels=None):            
        """
        Args:
            root_dir (string): Directory containing the images.
            filelist(string): csv file with informations
            transform (callable, optional): Transformation to be applied to the images.
            split (string): Split type ("train", "val").
            exclude_labels(int): labels to exclude from the created dataset
        """
        self.split=split
        self.root_dir = root_dir  # Set the root directory for the dataset
        self.data =  pd.read_csv(filelist, sep=",")  # Load the dataset         
        

        self.transform = transform
        
        # Exclude labels if specified
        if exclude_labels is not None:
            self.data = self.data[~self.data.iloc[:,1].isin(exclude_labels)]
            
        
        total_data_len = int(len(self.data))
        idx = np.arange(total_data_len)
        np.random.seed(41)
        np.random.shuffle(idx)
        print(f"Shuffled indices (first 5): {idx[:5]}")  # Print first 5 shuffled indices

        # Select data based on split
        if split == "train":
            self.data = self.data.iloc[idx[:int(total_data_len * 0.8)]] # Use 80% of the data for training
        elif split == "val":
            self.data = self.data.iloc[idx[int(total_data_len * 0.8):]] # Use 20% of the data for validation
        else:
            self.data = self.data   # Use all data for testing (only for code clarity)
        


      def __len__(self):    
        """
        Returns the length of the dataset (number of samples).

        This method overrides the default behavior of `len` for the dataset object.
        It simply returns the length of the internal `data` list, which represents
        the preprocessed data after loading and filtering.
        """
        return len(self.data)

      def __getitem__(self, idx):
        """
        Retrieves a sample (image and corresponding label) at a given index.

        This method overrides the default behavior of indexing for the dataset object.
        It takes an index `idx` and performs the following:
            1. Accesses the image name and label at the specified index from `self.data`.
            2. Opens the image using `Image.open` with the full path constructed by
               combining `self.root_dir` and `img_name`.
            3. Applies the defined transformation (`self.transform`) to the image.
          
            5. Creates a dictionary `sample` containing the preprocessed image (`image`)
               and the label.
            6. Returns the constructed `sample` dictionary.
        """
        img_name, label = self.data.iloc[idx] 
        image = Image.open(os.path.join(self.root_dir, img_name))
        image = self.transform(image)
        
        sample = {'image': image, 'label': label}
        return sample , img_name

In [None]:
import pickle

# Define data transformations (augmentations for training and normalization)
transform_train = transforms.Compose([  # Compose multiple transformations together
    transforms.Resize((256, 256)),  # Resize images to 256x256
    transforms.RandomHorizontalFlip(p=0.5),  # Randomly flip images horizontally for training augmentation
    transforms.ToTensor(),  # Convert PIL images to PyTorch tensors
    transforms.Normalize(  # Normalize pixel values based on mean and standard deviation of the training dataset

        mean=[0.63867598, 0.54430306, 0.44470324],
        std=[0.22658289, 0.24506914, 0.26728659]
    )
])

transform_val = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize images to 256x256 (consistent with training)
    transforms.ToTensor(),  # Convert PIL images to PyTorch tensors
    transforms.Normalize(  # Normalize pixel values using the same statistics
        mean=[0.63867598, 0.54430306, 0.44470324],
        std=[0.22658289, 0.24506914, 0.26728659]
    )
])

# Set batch size
bs = 64

# Create datasets for training, validation, and testing

trainset = FoodDataset("/Users/stefanocarotti/Supervised/Project/train_set","/Users/stefanocarotti/Supervised/Project/annot/train_info.csv" ,transform_train, "train")

valset = FoodDataset("/Users/stefanocarotti/Supervised/Project/train_set","/Users/stefanocarotti/Supervised/Project/annot/train_info.csv" ,transform_val, "val")

testset = FoodDataset("/Users/stefanocarotti/Supervised/Project/val_set","/Users/stefanocarotti/Supervised/Project/annot/val_info.csv" ,transform_val, "test")



# Create data loaders for efficient batch training and evaluation
trainloader = torch.utils.data.DataLoader(trainset, batch_size=bs, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=1, shuffle=False)
testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False)

# Print dataset length (number of samples)
print(f"Number of training samples: {len(trainloader) * bs}")
print(f"Number of validation samples: {len(valloader)}")
print(f"Number of test samples: {len(testloader)}")

# visualize one example
sample, name = trainset[0]
print(sample['image'].shape)
plt.imshow(sample['image'].permute(1, 2, 0))
#plt.title(name)
plt.axis("off")
plt.savefig("sample_image.png")
print(name, sample['label'])

## Define the custom model

In [None]:
import torch.nn as nn # Import the neural network module from PyTorch
import torch.nn.functional as F # Import the functional interface module from PyTorch

#Define accuracy function
def accuracy(prediction, y):
    """Calculate accuracy."""
    return ((prediction == y).sum() / len(y)).item()


class FoodNet(nn.Module):  # Define a custom neural network class FoodNet
    def __init__(self):  # Define the class constructor
        super(FoodNet, self).__init__()
        # Define the layers of the network 
        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 16, 5)
        self.conv2 = nn.Conv2d(16, 32, 5)
        self.conv3 = nn.Conv2d(32, 64, 3)
        self.conv4 = nn.Conv2d(64, 128, 3)
        self.conv5 = nn.Conv2d(128, 56, 3)
        # Fully connected layers
        self.fc1 = nn.Linear( 1400 , 500)
        self.norm = nn.BatchNorm1d(500) # Batch Normalization
        self.fc2 = nn.Linear(500, 251)
        
        # Pooling layers
        self.pool1 = nn.MaxPool2d(2, 2)
        # Dropout layer
        self.dropout = nn.Dropout(0.5)
        #Define the Optimizer
        self.optimizer = torch.optim.Adam(self.parameters(), lr= 0.0004 )
        
        # Define the loss function
        self.criterion = nn.CrossEntropyLoss()
    
          
        
       
        
    def forward(self, x):  # Define the forward pass of the network
        x = self.pool1(F.relu(self.conv1(x)))   # Apply relu and pooling to the output of  conv1
        x = self.pool1(F.relu(self.conv2(x)))
        x = self.pool1(F.relu(self.conv3(x)))
        x = self.pool1(F.relu(self.conv4(x)))
        x = self.pool1(F.relu(self.conv5(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.norm(x)
        x = self.fc2(x)
        
        return x
    
    def training_step(self, sample):    #Training function
        
        self.optimizer.zero_grad()  #Reset the gradient
        image = sample['image'].to(device) #Pass the image and label to the GPU
        label = sample['label'].to(device)
        prediction = self.forward(image)
        loss = self.criterion(prediction, label)
        loss.backward()
        self.optimizer.step()
        
        acc = accuracy(torch.argmax(prediction, dim=1), label)  # Calculate the accuracy
        return loss.item(), acc # Return the loss and accuracy
        


In [None]:
from torchsummary import summary
#summary of the model
model = FoodNet()
summary(model, (3, 256, 256))
#It must have less then 1M parameters

## Training the model

In [None]:
from tqdm.auto import tqdm
# Import scheduler
from torch.optim.lr_scheduler import StepLR


# Define the evaluation function
def evaluate(model, testloader):
    model.eval()  # Set the model to evaluation mode
    #Initialization 
    y_true = []
    y_pred = []
    epoch_loss = 0
    epoch_acc = 0
    prog_bar = tqdm( testloader, total=len(testloader)) 
    for i, (sample, _) in enumerate(prog_bar):  # Iterate over the validation data loader
        with torch.no_grad():
            image = sample['image'].to(device)
            label = sample['label'].to(device)
            prediction = model.forward(image)
            epoch_loss += model.criterion(prediction, label)
            epoch_acc += accuracy(torch.argmax(prediction, dim=1), label)
            y_true.extend(label.cpu().numpy())
            y_pred.extend(torch.argmax(prediction, dim=1).cpu().numpy())
            
    epoch_loss /= len(testloader)  # Calculate the average loss for the epoch
    epoch_acc /= len(testloader)  # Calculate the average accuracy for the epoch        
    return y_true, y_pred, epoch_loss, epoch_acc 




model = FoodNet().to(device)  # Create an instance of the FoodNet class
num_epochs = 70  # Set the number of epochs
scheduler = StepLR(model.optimizer, step_size=15, gamma=0.5)

# Initialize lists to store the training loss and accuracy
train_loss = []
train_acc = []

val_loss = []
val_acc = []
#Define patience
patience = 6

# Iterate over epochs
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}")
    model.train()  # Set the model to training mode
    epoch_loss = 0
    epoch_acc = 0
    prog_bar = tqdm( trainloader, total=len(trainloader))
    for i, (sample, _) in enumerate(prog_bar):  # Iterate over the training data loader
        loss, acc = model.training_step(sample)  # Calculate the loss and accuracy for the batch
        epoch_loss += loss
        epoch_acc += acc
        if i % 20 == 0:
            print(f"Batch {i}, Loss: {loss}, Accuracy: {acc*100}")
    epoch_loss /= len(trainloader)  # Calculate the average loss for the epoch
    epoch_acc /= len(trainloader)  # Calculate the average accuracy for the epoch
    train_loss.append(epoch_loss)  # Append the average loss to the list
    train_acc.append(epoch_acc)  # Append the average accuracy to the list
    print(f"Epoch {epoch + 1}, Loss: {epoch_loss}, Accuracy: {epoch_acc*100}%")
    
    # Evaluate the model on the validation set
    y_true, y_pred, loss, acc = evaluate(model, valloader) 
    val_loss.append(loss)  # Append the validation loss to the list
    val_acc.append(acc)  # Append the validation accuracy to the list   
    
    
    print(f"epoch: {epoch+1}, Validation Accuracy: {acc*100}%", f"Validation Loss: {loss}")
    scheduler.step()  # Update the learning rate scheduler 
    
    
    #Save the best model
    if epoch == 0:
        best_accuracy = acc
    else:
        if acc > best_accuracy:
            best_accuracy = acc
            torch.save(model.state_dict(), "best_simple_model_nosched.pth")
    # Early stopping
    if epoch > patience:
        if val_acc[-1] < val_acc[-2]:
            patience -= 1
            if patience == 0:
                print("Early stopping")
                break
        else:
            patience = 6
    
    


    

In [None]:
#Save the model
torch.save(model.state_dict(), "model.pth")

In [None]:
#Load the saved model "name_of_the_model.pth"
model = FoodNet().to(device)
model.load_state_dict(torch.load("best_model.pth"))

In [None]:
#convert val_loss for plotting
for i in range(len(val_loss)):
    val_loss[i] = val_loss[i].item()

In [None]:
# Plot the training loss and accuracy
plt.figure(figsize=(10, 5))
plt.subplot(2, 2, 1)
plt.plot(train_loss)
plt.title("Training Loss")

plt.subplot(2, 2, 2)
plt.plot(train_acc)
plt.title("Training Accuracy")


plt.subplot(2, 2, 3)
plt.plot(val_acc)
plt.title("Validation Accuracy")

plt.subplot(2, 2, 4)
plt.plot(val_loss)
plt.title("Validation Loss")


In [None]:
#Save the accuracies plot for the report
plt.figure()
plt.plot(train_acc)
plt.title("Training Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.savefig("train_acc_finalmodel.png")


plt.figure()
plt.plot(val_acc)
plt.title("Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.savefig("val_acc_simple_finalmodel.png")

##Model evaluation on test set

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, precision_score, recall_score


# Compute evaluation metrics on test set
y_true, y_pred, loss, acc = evaluate(model, testloader)
cm = confusion_matrix(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')


print(f"Loss: {loss}")
print(f"Test Accuracy: {acc}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

In [None]:
# Plot the confusion matrix
plt.figure()
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.savefig("confusion_matrix.png")

In [None]:
#Checking the best and worst predicted classes
diag = np.diag(cm)
print("Max true positive", np.argsort(diag)[-10:])
#print the value of the max true positive
print("Max true positive", diag[np.argsort(diag)[-10:]])

#print lowest 10 true positive
print("Lowest 10 true positive", np.argsort(diag)[:35])
#print the value of the lowest true positive
print("Lowest 10 true positive", diag[np.argsort(diag)[:35]])


## Create a new trainset without label 116 and 58

In [None]:
#Using FoodDataset option "exclude_labels" 
new_trainset = FoodDataset("/Users/stefanocarotti/Supervised/Project/train_set","/Users/stefanocarotti/Supervised/Project/annot/train_info.csv" ,transform_train, "train",exclude_labels =  [58, 116])
new_valset = FoodDataset("/Users/stefanocarotti/Supervised/Project/train_set","/Users/stefanocarotti/Supervised/Project/annot/train_info.csv" ,transform_val, "val",exclude_labels =  [58, 116])
new_testset = FoodDataset("/Users/stefanocarotti/Supervised/Project/val_set","/Users/stefanocarotti/Supervised/Project/annot/val_info.csv" ,transform_val, "test",exclude_labels =  [58, 116])

bs = 64
# Create data loaders
trainloader = torch.utils.data.DataLoader(new_trainset, batch_size=bs, shuffle=True)
valloader = torch.utils.data.DataLoader(new_valset, batch_size=1, shuffle=False)
testloader = torch.utils.data.DataLoader(new_testset, batch_size=1, shuffle=False)



In [None]:
print("Number of training samples:", len(new_trainset))
print("Number of validation samples:", len(new_valset))
print("Number of test samples:", len(new_testset))

print("Number of old training samples:", len(trainset))
print("Number of old validation samples:", len(valset))
print("Number of old test samples:", len(testset))


In [None]:
# Train the model (Remember to change the last fully connected layer output from 251 to 249)
from tqdm.auto import tqdm
from torch.optim.lr_scheduler import StepLR


model = FoodNet().to(device)  # Create an instance of the FoodNet class
num_epochs = 70  # Set the number of epochs
scheduler = StepLR(model.optimizer, step_size=15, gamma=0.5)

# Initialize lists to store the training loss and accuracy
train_loss = []
train_acc = []

val_loss = []
val_acc = []
#Define patience
patience = 6
# Iterate over epochs
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}")
    model.train()  # Set the model to training mode
    epoch_loss = 0
    epoch_acc = 0
    prog_bar = tqdm(trainloader, total=len(trainloader))
    for i, (sample, _) in enumerate(prog_bar):  # Iterate over the training data loader
        loss, acc = model.training_step(sample)  # Calculate the loss and accuracy for the batch
        epoch_loss += loss
        epoch_acc += acc
        if i % 20 == 0:
            print(f"Batch {i}, Loss: {loss}, Accuracy: {acc * 100}")
    epoch_loss /= len(trainloader)  # Calculate the average loss for the epoch
    epoch_acc /= len(trainloader)  # Calculate the average accuracy for the epoch
    train_loss.append(epoch_loss)  # Append the average loss to the list
    train_acc.append(epoch_acc)  # Append the average accuracy to the list
    print(f"Epoch {epoch + 1}, Loss: {epoch_loss}, Accuracy: {epoch_acc * 100}%")

    # Evaluate the model on the validation set
    y_true, y_pred, loss, acc = evaluate(model, valloader)  # Evaluate the model on the validation set
    val_loss.append(loss)  # Append the validation loss to the list
    val_acc.append(acc)  # Append the validation accuracy to the list   

    print(f"epoch: {epoch + 1}, Validation Accuracy: {acc * 100}%", f"Validation Loss: {loss}")
    scheduler.step()  # Update the learning rate scheduler 
    
    
    #Save the best model
    if epoch == 0:
        best_accuracy = acc
    else:
        if acc > best_accuracy:
            best_accuracy = acc
            torch.save(model.state_dict(), "best_model_newset.pth")
    
    # Early stopping
    if epoch > patience:
        if val_acc[-1] < val_acc[-2]:
            patience -= 1
            if patience == 0:
                print("Early stopping")
                break
        else:
            patience = 6
    
    


    


