# TP 1
## Groupe des amateurs de vin

### Reconnaissance de vin.

Dans ce projet, nous allons faire de la reconnaissance de vins.

Source des données : https://huggingface.co/datasets/katossky/wine-recognition

In [1]:
import torch
from torch import nn
from torchvision.io import read_image
import numpy as np
import glob
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import copy
from torch.optim import Adam, SGD, Adagrad

### Étape 1 : Charger les données

En PyTorch, les données doivent être transmise au réseau de neurones à l'aide d'un loader. La première étape est de créer une classe de type `Dataset` que la fonction `DataLoader` prend en argument. La classe doit au moins posséder les trois routine `__init__`, `__len__` et `__getitem__`.

In [2]:
import pandas as pd

wineDataset = pd.read_csv("wine-dataset/wine.csv")

print(wineDataset)

class WineDataset(Dataset):
    def __init__(self, dataset):
        self.data = dataset
        
        self.labels = self.data.iloc[:, 0].values  # first column is label
        self.features = self.data.iloc[:, 1:].values  # All other columns are features
        
        # Normalize the features (if needed)
        self.features = (self.features - self.features.mean(axis=0)) / self.features.std(axis=0)
        # Turns labels into indices from 0 to 2 for CrossEntropyLoss
        self.labels = self.labels - 1
        # Ensure labels are binary (0 and 1)
        '''label_min = self.labels.min()
        label_max = self.labels.max()
        if label_max - label_min != 0:
            self.labels = (self.labels - label_min) / (label_max - label_min)
        else:
            self.labels = self.labels * 0'''
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        features = torch.tensor(self.features[idx], dtype=torch.float32)
        label = torch.tensor(self.labels[idx], dtype=torch.int64)  # or torch.float32 if needed
        return features, label

# Instantiate the dataset
wine_dataset = WineDataset(wineDataset)

# Print the labels to verify
print(wine_dataset.labels)

# Example: Fetch a sample to verify
features, label = wine_dataset[0]
print(f'Sample features: {features}')
print(f'Sample label: {label}')

     label  alcohol  malic acid   ash  alcalinity of ash  magnesium  \
0        1    14.23        1.71  2.43               15.6        127   
1        1    13.20        1.78  2.14               11.2        100   
2        1    13.16        2.36  2.67               18.6        101   
3        1    14.37        1.95  2.50               16.8        113   
4        1    13.24        2.59  2.87               21.0        118   
..     ...      ...         ...   ...                ...        ...   
173      3    13.71        5.65  2.45               20.5         95   
174      3    13.40        3.91  2.48               23.0        102   
175      3    13.27        4.28  2.26               20.0        120   
176      3    13.17        2.59  2.37               20.0        120   
177      3    14.13        4.10  2.74               24.5         96   

     total phenols  flavanoids  nonflavanoid phenols  proanthocyanins  \
0             2.80        3.06                  0.28             2.29   
1

### Etape 2 : Construire le model du réseau de neurones

Dans cette partie, nous élaborons notre model afin qu'il puisse être entrainé sur notre dataset.

In [3]:
wineModel = torch.nn.Sequential(torch.nn.Linear(13,64),
                                torch.nn.ReLU(),
                                torch.nn.Linear(64,13),
                                torch.nn.ReLU(),
                                torch.nn.Linear(13,3))


### Étape 3 : création des loader d'entraintement, de validation et de test

Ici on choisit un batch de 20.

Pour une taille de batch de 20, on coupera le jeu de donnée de la manière suivante : 
- Entrainement de la partie 1 à la partie 120,
- Validation de la partie 120 à la partie 160,
- Test de la partie 160 à la fin.

In [4]:
# Define paths for train, validation, and test datasets
wineDataset = wineDataset.sample(n=len(wineDataset))
train_path = wineDataset.iloc[:120]
val_path = wineDataset.iloc[120:160]
test_path = wineDataset.iloc[160:]

# Create dataset objects
train_dataset = WineDataset(train_path)
val_dataset = WineDataset(val_path)
test_dataset = WineDataset(test_path)

# Define batch size
BATCH_SIZE = 20

# Create DataLoader objects
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

### Training function

In [5]:
def Print_loss_accuracy(nepoch, tloss, vloss, accuracy, best_tloss, best_vloss, best_accuracy):
    print ("{:<6} {:<15} {:<17} {:<15} {:<20} {:<22} {:<15}".format(nepoch, tloss, vloss, accuracy, best_tloss, best_vloss, best_accuracy))

def learning(nepoch, model, crit, optim, batchsize, trainingloader, validationloader, writer):
    best_tloss = 100.
    best_vloss = 100.
    best_accuracy = 0.
    
    Print_loss_accuracy('Epoch', 'training loss', 'validation loss', 'accuracy', 'best train loss', 'best validation loss', 'best accuracy')
    
    for epoch in range(nepoch):
        tloss = 0.
        vloss = 0.
        correct_test = 0
        model.train()
        
        for features, labels in trainingloader:
            optim.zero_grad()
            outputs = model(features)
            loss = crit(outputs, labels)
            loss.backward()
            optim.step()
            tloss += loss.item() * features.size(0)
        
        tloss /= len(trainingloader.dataset)
        model.eval()
        
        for features, labels in validationloader:
            predicted = model(features)
            _, predicted_labels = torch.max(predicted, 1)
            correct_test += (predicted_labels == labels).sum().item()
            loss = crit(predicted, labels)  # Ensure labels has the same shape as predicted
            vloss += loss.item() * features.size(0)
        
        vloss /= len(validationloader.dataset)
        accuracy = 100 * correct_test / len(validationloader.dataset)

        # Log metrics to TensorBoard
        writer.add_scalar('Loss/Train', tloss, epoch)
        writer.add_scalar('Loss/Validation', vloss, epoch)
        writer.add_scalar('Accuracy/Validation', accuracy, epoch)

        if accuracy >= best_accuracy:
            torch.save(model.state_dict(), f"best_model.pth")
            best_accuracy = accuracy
        if vloss <= best_vloss:
            best_vloss = vloss
        if tloss <= best_tloss:
            best_tloss = tloss
        
        Print_loss_accuracy(epoch + 1, 
                            np.round(tloss, 8), 
                            np.round(vloss, 8), 
                            np.round(accuracy, 8), 
                            np.round(best_tloss, 8), 
                            np.round(best_vloss, 8), 
                            np.round(best_accuracy, 8))

    # Ensure all pending events have been written to disk
    writer.flush()

    # Close the SummaryWriter
    writer.close()


### Test model function

In [6]:
def Testmodel(modelfile, crit, testloader):
    # Load the trained model
    model = torch.load(modelfile)
    # Evaluate the model
    model.eval()
    plt.figure(dpi=300)
    ct = 1
    for features, labels in testloader:
        image = features[0].permute(1, 2, 0)
        plt.subplot(1, len(test_loader.sampler), ct)
        plt.imshow(image)
        plt.xticks([])
        plt.yticks([])
        predicted = model(features).squeeze(dim=1)
        loss = crit(predicted, labels.squeeze(dim=1))
        plt.title('True label : {} \n Predicted label : {} \n Test loss : {}'.format(labels.squeeze().detach().numpy(), 
                                                                       predicted.round().detach().numpy(),
                                                                       np.round(test_loss.item(), 2)),
                  fontsize=6)
        ct += 1

### Etape 4 : Choisir une fonction coût

In [7]:
criterion = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(wineModel.parameters(), lr = 0.001)

### Training & Test

C'est le moment de s'amuser

In [8]:
EPOCHS = 20
LEARNING_RATES = [0.001, 0.01]
OPTIMIZERS = {
    "SGD": SGD,
    "Adagrad": Adagrad,
    "Adam": Adam
}

# Copy the initial model to ensure the same starting weights for each run
initial_model_state = copy.deepcopy(wineModel.state_dict())

# Loop through configurations and train model
for optimizer_name, optimizer_class in OPTIMIZERS.items():
    for lr in LEARNING_RATES:
        # Reset the model to initial state
        wineModel.load_state_dict(initial_model_state)

        # Create a new instance of the optimizer for each run
        optimizer = optimizer_class(wineModel.parameters(), lr=lr)

        # Create a SummaryWriter with a unique run name
        run_name = f"optimizer={optimizer_name}_lr={lr}_epochs={EPOCHS}"
        writer = SummaryWriter(log_dir=f'runs/{run_name}')

        print(f"Training with optimizer: {optimizer_name}, learning rate: {lr}")
        
        # Train the model with the current configuration
        learning(EPOCHS, wineModel, criterion, optimizer, BATCH_SIZE, train_loader, val_loader, writer)

        # Save the best model for the current configuration
        torch.save(wineModel.state_dict(), f"best_models/best_model_{run_name}.pth")

# learning(20, wineModel, criterion, optimizer, BATCH_SIZE, train_loader, val_loader)
# torch.save(wineModel.state_dict(), "best_model.pth")
# print(len(val_loader.dataset))
# Testmodel("best_model.pth", criterion, test_loader)

Training with optimizer: SGD, learning rate: 0.001
Epoch  training loss   validation loss   accuracy        best train loss      best validation loss   best accuracy  
1      1.09701151      1.12452078        32.5            1.09701151           1.12452078             32.5           
2      1.09651037      1.12409145        32.5            1.09651037           1.12409145             32.5           
3      1.09602672      1.12366313        32.5            1.09602672           1.12366313             32.5           
4      1.09551585      1.12323153        32.5            1.09551585           1.12323153             32.5           
5      1.09504086      1.12279898        32.5            1.09504086           1.12279898             32.5           
6      1.09454219      1.12236893        32.5            1.09454219           1.12236893             32.5           
7      1.09407379      1.12193984        32.5            1.09407379           1.12193984             32.5           
8      1.0935