# `Notebook de Model`

Notebook de Mickael MELKOWSKI

## `Config`

### `Import`

In [None]:
import numpy as np
import pandas as pd

import pathlib

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

import matplotlib.pyplot as plt
import time
import copy
from random import shuffle

import tqdm.notebook as tqdm

import sklearn
from sklearn.metrics import accuracy_score, cohen_kappa_score
from sklearn.metrics import classification_report
from PIL import Image

# check for GPU support
torch.cuda.is_available()

In [None]:
# config
# chemin relatif vers le dossier data
home = pathlib.Path("../")
path_to_data = pathlib.Path("../data")

data_folder_path = path_to_data / "raw" / "COVID-19_Radiography_Dataset"
output_path = path_to_data / "processed" / "covid_19_masked_tiny_500"
folder_to_process = ["Lung_Opacity","COVID","Normal","Viral_Pneumonia"]

# model save path
# "/home/tylio/code/Project_radio_pulmo/code/radio_pulmo/models"
model_save_path = home / "models"

## `PyTorch`

### `DataLoading`

In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import pathlib
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np

# config
dataset_path = "/home/tylio/code/Project_radio_pulmo/code/radio_pulmo/data/processed/covid_19_masked_tiny_500"



### `Normalization`

In [None]:
# Définir les transformations (optionnel, mais recommandé)
transform = transforms.Compose([
    # transforms.Resize((64, 64)),  # Redimensionne les images
    transforms.ToTensor(),  # Convertit les images en tenseurs
])

# Charger les données à partir du dossier
full_dataset = datasets.ImageFolder(root=dataset_path, transform=transform)
#dataset_test = datasets.ImageFolder(root='Testing', transform=transform)

# mean and std for normalization
def get_mean_std(loader):
    # Compute the mean and standard deviation of all pixels in the dataset
    num_pixels = 0
    mean = 0.0
    std = 0.0
    for images, _ in loader:
        batch_size, num_channels, height, width = images.shape
        num_pixels += batch_size * height * width
        mean += images.mean(axis=(0, 2, 3)).sum()
        std += images.std(axis=(0, 2, 3)).sum()

    mean /= num_pixels
    std /= num_pixels

    return mean, std

loader = DataLoader(full_dataset, batch_size=32, shuffle=True)

mean, std = get_mean_std(loader)

# Définir les transformations (optionnel, mais recommandé)
transform = transforms.Compose([
    # transforms.Resize((64, 64)),  # Redimensionne les images
    # transforms.RandomRotation(10), # Rotates the images upto Max of 10 Degrees
    # transforms.RandomHorizontalFlip(p=0.4), #Performs Horizantal Flip over images 
    transforms.ToTensor(),  # Convertit les images en tenseurs
    transforms.Normalize(mean=mean, std=std)  # normalize
])

# Re-Charger les données avec la nouvelle transformation
full_dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

### `Normalization and data augmentation`

Test en ajoutant aussi un resize

In [None]:
# Définir les transformations (optionnel, mais recommandé)
transform = transforms.Compose([
    # transforms.Resize((64, 64)),  # Redimensionne les images
    transforms.ToTensor(),  # Convertit les images en tenseurs
])

# Charger les données à partir du dossier
full_dataset = datasets.ImageFolder(root=dataset_path, transform=transform)
#dataset_test = datasets.ImageFolder(root='Testing', transform=transform)

# mean and std for normalization
def get_mean_std(loader):
    # Compute the mean and standard deviation of all pixels in the dataset
    num_pixels = 0
    mean = 0.0
    std = 0.0
    for images, _ in loader:
        batch_size, num_channels, height, width = images.shape
        num_pixels += batch_size * height * width
        mean += images.mean(axis=(0, 2, 3)).sum()
        std += images.std(axis=(0, 2, 3)).sum()

    mean /= num_pixels
    std /= num_pixels

    return mean, std

loader = DataLoader(full_dataset, batch_size=32, shuffle=True)

mean_nums, std_nums = get_mean_std(loader)

# Définir les transformations (optionnel, mais recommandé)
data_transforms = {
    "train":transforms.Compose([
        transforms.Resize((150,150)), #Resizes all images into same dimension
        transforms.RandomRotation(10), # Rotates the images upto Max of 10 Degrees
        transforms.RandomHorizontalFlip(p=0.4), #Performs Horizantal Flip over images 
        transforms.ToTensor(), # Coverts into Tensors
        transforms.Normalize(mean = mean_nums, std=std_nums)]), # Normalizes

    "val": transforms.Compose([
        transforms.Resize((150,150)),
        transforms.CenterCrop(150), #Performs Crop at Center and resizes it to 150x150
        transforms.ToTensor(),
        transforms.Normalize(mean=mean_nums, std = std_nums)
    ])}

# Re-Charger les données avec la nouvelle transformation
def load_split_train_test(datadir, data_transforms, valid_size = .2):
    train_data = datasets.ImageFolder(
        datadir,
        transform=data_transforms['train']
        ) #Picks up Image Paths from its respective folders and label them
    test_data = datasets.ImageFolder(
        datadir,
        transform=data_transforms['val']
        )
    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))
    np.random.shuffle(indices)
    train_idx, test_idx = indices[split:], indices[:split]
    dataset_size = {"train":len(train_idx), "val":len(test_idx)}
    train_sampler = SubsetRandomSampler(train_idx) # Sampler for splitting train and val images
    test_sampler = SubsetRandomSampler(test_idx)
    trainloader = torch.utils.data.DataLoader(train_data,
                   sampler=train_sampler, batch_size=8) # DataLoader provides data from traininng and validation in batches
    testloader = torch.utils.data.DataLoader(test_data,
                   sampler=test_sampler, batch_size=8)
    return trainloader, testloader, dataset_size

dataloader_train, dataloader_test, dataset_size = load_split_train_test(dataset_path, data_transforms, .2)
dataloaders = {"train":dataloader_train, "val":dataloader_test}
data_sizes = {x: len(dataloaders[x].sampler) for x in ['train','val']}
class_names = dataloader_train.dataset.classes
print(class_names)

### `Splitting data`

In [None]:
# split dataset 80%, 20% --> [0.8, 0.2]
dataset_train, dataset_test = torch.utils.data.random_split(full_dataset, [0.8, 0.2])

In [None]:
# Create dataloader object
dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True)
dataloader_test = DataLoader(dataset_test, batch_size=32, shuffle=False)

### `Baseline Model definition from hugging face`

[Hugging face models image classification sort by downloads](https://huggingface.co/models?pipeline_tag=image-classification&sort=downloads)

#### test ResNet-50

[ResNet50_hugging_face](https://huggingface.co/microsoft/resnet-50)

In [None]:
from transformers import AutoImageProcessor, ResNetForImageClassification
# import torchgen

model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")

# freeze (false) or unfreeze params (True):
for param in model.parameters():
    param.requires_grad = True

# Adaptation to our 4 classes by changing the final layer
num_labels = 4
model.num_labels = num_labels
# model.classifier = torch.nn.Linear(model.classifier.in_features, num_labels)
model.classifier[-1] = torch.nn.Linear(model.classifier[-1].in_features, num_labels)

device = "cuda"
model.to(device)

#### test VGG16

[vgg16.tv_in1k_hugging_face](https://huggingface.co/timm/vgg16.tv_in1k) --> trained on ImageNet 1k

In [None]:
import timm
model = timm.create_model('vgg16.tv_in1k', pretrained=True)

In [None]:
num_features = model.features[-3].out_channels
print(model.features[-3])
model.head.fc

In [None]:
#print(model.head.fc) --> Linear(in_features=4096, out_features=1000, bias=True)
model.head.fc = torch.nn.Linear(4096, 4)

device = "cuda"
model.to(device)

In [None]:
# freeze (false) or unfreeze params (True):
for param in model.parameters():
    param.requires_grad = True

# Adaptation to our 4 classes by changing the final layer
num_labels = 4
model.num_labels = num_labels
# model.classifier = torch.nn.Linear(model.classifier.in_features, num_labels)
# model.classifier[-1] = torch.nn.Linear(model.classifier[-1].in_features, num_labels)

#device = "cuda"
#model.to(device)

### `Baseline Model definition`

In [None]:
import torch.nn as nn
device = "cuda"
model = nn.Sequential(
   nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3), # output 16, 254, 254
   nn.MaxPool2d(kernel_size=2), # output 16, 127, 127
   nn.ReLU(),
    
   nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3), # 32, 125, 125
   nn.ReLU(),
   nn.MaxPool2d(kernel_size=2), # output 32, 62, 62
    
   nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3), # output  64, 60, 60
   nn.ReLU(),
   nn.MaxPool2d(kernel_size=2), # output 64, 30, 30
    
   nn.Flatten(),
   nn.Linear(64 * 30 * 30, 64), # on précise ici la dim finale --> 64, 30, 30
   nn.ReLU(),
   nn.Linear(64, 4)

)
model.to(device)

#### `Summary`

In [None]:
from torchsummary import summary
summary(model, input_size=(3,256,256), device=device)

#### `Loss function`

In [None]:
X_batch, y_batch = next(iter(dataloader_train))

# Définir la fonction de perte
criterion = nn.CrossEntropyLoss()

y_pred = model(X_batch.to(device))

criterion(y_pred, y_batch.to(device))

#### `Fitting`

In [None]:
from torch import optim
from tqdm.notebook import tqdm
epochs = 10

# Définition de l'optimizer
optimizer = optim.Adam(model.parameters(), 1e-2)


for epoch in range(epochs):
    # Dans ce mode certaines couches du modèle agissent différemment
    model.train()
    loss_total = 0
    # Barre de progression
    progress_bar = tqdm(
            dataloader_train, desc="Epoch {:1d}".format(epoch), leave=True, disable=False
        )
    
    for i, batch in enumerate(progress_bar):
        # Batch de données
        X_batch, y_batch = batch
        
        # Device
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        
        # Gradient mis 0
        model.zero_grad()

        # Calcul de prédiction
        y_pred = model(X_batch.to(torch.float32))

        # Calcul de la fonction de perte
        loss =  criterion(y_pred, y_batch) #torch.mean(torch.abs(y_pred- y_batch.to(torch.float32)))#
        # Backpropagation : calculer le gradient de la loss en fonction de chaque couche
        loss.backward()
        
        # Clipper le gradient entre 0 et 1 pour plus de stabilité
        #  torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # Descente de gradient : actualisation des paramètres
        optimizer.step()
        
        loss_total += loss.item()
        
        progress_bar.set_postfix(
            {
                "training_loss": "{:.3f}".format(loss_total/(i+1))}
        )

#### `Evaluation`

In [None]:
from sklearn.metrics import accuracy_score
import numpy as np

def evaluate(dataloader_val):
    # Passer le modèle en évaluation
    model.eval()
    # Calculer la loss totale
    loss_val_total = 0
    # Stocker les prédictions et les vraies valeurs.
    predictions, true_vals = [], []
    for batch in dataloader_val:
        X_batch, y_batch = batch
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        with torch.no_grad():
            # Prédiction du modèle pour un batch donné
            y_pred = model(X_batch.to(torch.float32))
        # Calcul de la fonction de perte pour l'utiliser comme une métrique
        loss = criterion(y_pred, y_batch)
        # Cummuler la fonction de perte de tous les lots de données.
        loss_val_total += loss.item()
        # Enregistrer les prédictions pour les utiliser plus tard
        predictions.extend(y_pred.detach().cpu().numpy())
        # Enregistrer les vraies valeurs pour les utiliser plus tard
        true_vals.extend(y_batch.cpu().numpy())

    # Loss du jeu de données val
    loss_val_avg = loss_val_total / len(dataloader_val)
    # Ensemble des prédictions du jeu de données
    predictions = np.array(predictions)
    # Id prediction
    predictions = np.argmax(predictions, axis=-1)
    # Ensemble des vraies valeurs du jeu de données
    true_vals = np.array(true_vals)
    return {"loss":loss_val_avg, "accuracy":accuracy_score(true_vals, predictions)}


metrics = evaluate(dataloader_test)

print(f"Loss: {metrics['loss']}")
print(f"Accuracy : {metrics['accuracy']}")

#### `Confusion matrix`

In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd

on_cuda = True # computed using cuda

y_pred = []
y_true = []

# iterate over test data
for inputs, labels in dataloader_test:
    if on_cuda:
        inputs, labels = inputs.cuda(), labels.cuda()
    output = model(inputs) # Feed Network

    output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
    y_pred.extend(output) # Save Prediction

    labels = labels.data.cpu().numpy()
    y_true.extend(labels) # Save Truth

# constant for classes
classes = folder_to_process

# Build confusion matrix
cf_matrix = confusion_matrix(y_true, y_pred)
df_cm = pd.DataFrame(cf_matrix / np.sum(cf_matrix, axis=1)[:, None], 
                     index = [i for i in classes],
                     columns = [i for i in classes])
plt.figure(figsize = (6,4))
sn.heatmap(df_cm, annot=True)


## `DenseNet-121 architecture`

Using tuto on kaggle: [covid-19-detection-pytorch-tutorial](https://www.kaggle.com/code/arunrk7/covid-19-detection-pytorch-tutorial)

### `Model creation`

In [None]:
class_names = folder_to_process
device="cuda"

def CNN_Model(pretrained=True):
    model = models.densenet121(weights=pretrained) # Returns Defined Densenet model with weights trained on ImageNet
    num_ftrs = model.classifier.in_features # Get the number of features output from CNN layer
    model.classifier = nn.Linear(num_ftrs, len(class_names)) # Overwrites the Classifier layer with custom defined layer for transfer learning
    model = model.to(device) # Transfer the Model to GPU if available
    return model

model = CNN_Model(pretrained=True)


## `Every model Training and evaluation`

La partie ci-dessous contient le code pour le training et evaluation du model contenue dans la variable `model` avec le dataset d'entrainement de validation dans le dict `dataloaders`

#### `Criterion & Optimizer`

In [None]:
# specify loss function (categorical cross-entropy loss)
criterion = nn.CrossEntropyLoss() 

# Specify optimizer which performs Gradient Descent
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) # Learning Scheduler

#### `Training function`

In [None]:
dataloaders = {"train":dataloader_train,
               "val":dataloader_test}
data_sizes = {x: len(dataloaders[x].sampler) for x in ['train','val']}

metrics = {
    "acc":[],
    "val_acc":[],
    "loss":[],
    "val_loss":[],
}

def train_model(model, criterion, optimizer, scheduler, dataloaders, num_epochs=10):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = np.inf

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train() # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            current_loss = 0.0
            current_corrects = 0
            current_kappa = 0
            val_kappa = list()

            for inputs, labels in tqdm.tqdm(dataloaders[phase], desc=phase, leave=False):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # We need to zero the gradients in the Cache.
                optimizer.zero_grad()

                # Time to carry out the forward training poss
                # We only need to log the loss stats if we are in training phase
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                if phase == 'train':
                    scheduler.step()

                # We want variables to hold the loss statistics
                current_loss += loss.item() * inputs.size(0)
                current_corrects += torch.sum(preds == labels.data)
                val_kappa.append(cohen_kappa_score(preds.cpu().numpy(), labels.data.cpu().numpy()))

            epoch_loss = current_loss / data_sizes[phase]
            epoch_acc = current_corrects.double() / data_sizes[phase]

            if phase == 'val':
                epoch_kappa = np.mean(val_kappa)
                print(f'{phase} Loss: {epoch_loss:.4f} | {phase} Accuracy: {epoch_acc:.4f} | Kappa Score: {epoch_kappa:.4f}')
                metrics["acc"].append(float(epoch_acc))
                metrics["loss"].append(float(epoch_loss))
            else:
                print(f'{phase} Loss: {epoch_loss:.4f} | {phase} Accuracy: {epoch_acc:.4f}')
                metrics["val_acc"].append(float(epoch_acc))
                metrics["val_loss"].append(float(epoch_loss))

            # EARLY STOPPING
            if phase == 'val' and epoch_loss < best_loss:
                print('Val loss Decreased from {:.4f} to {:.4f} \nSaving Weights... '.format(best_loss, epoch_loss))
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_since = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_since // 60, time_since % 60))
    print('Best val loss: {:.4f}'.format(best_loss))

    # Now we'll load in the best model weights and return it
    model.load_state_dict(best_model_wts)
    return model

#### `Visual functions`

In [None]:
# config
class_names = folder_to_process

#Statistics Based on ImageNet Data for Normalisation
mean_nums = [mean, mean, mean]
std_nums = [std, std, std]

def imshow(inp, size =(30,30), title=None, normalized=True):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))

    if normalized:
        mean = mean_nums
        std = std_nums
        inp = std * inp + mean
        inp = np.clip(inp, 0, 1)

    plt.figure(figsize=size)
    plt.imshow(inp)
    if title is not None:
        plt.title(title, size=30)
    plt.pause(0.001)  # pause a bit so that plots are updated


# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_names[x] for x in classes])

In [None]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_handeled = 0
    ax = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
    
            for j in range(inputs.size()[0]):
                images_handeled += 1    
                ax = plt.subplot(num_images//2, 2, images_handeled)
                ax.axis('off')
                ax.set_title('Actual: {} predicted: {}'.format(class_names[labels[j].item()],class_names[preds[j]]))
                imshow(inputs.cpu().data[j], (5,5))

                if images_handeled == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

### `Training`

In [None]:
nb_epochs = 10
base_model = train_model(model, criterion, optimizer, exp_lr_scheduler, dataloaders, num_epochs=nb_epochs)

### `Saving`

In [None]:
torch.save(model.state_dict(), model_save_path / "pytorch/dense121_normalized.pt")

### `Loading`

See: [PyTorch saving_loading_models](https://pytorch.org/tutorials/beginner/saving_loading_models.html)

In [None]:
model = CNN_Model(pretrained=True)
model.load_state_dict(torch.load(model_save_path / "pytorch/dense121_normalized.pt", weights_only=True))
model.eval()

### `Eval and visualisation`

In [None]:
visualize_model(base_model)
plt.show()

### `Accuracy and loss`

In [None]:
print(metrics["acc"])

In [None]:
# use metrics dict from train function

nb_epochs = 10
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
# Courbe du score de test du réseau Dense 121
plt.plot(np.arange(1 , nb_epochs + 1, 1),
         metrics["acc"], 
         label = 'acc Dense121',
         color = 'blue')
plt.plot(np.arange(1 , nb_epochs + 1, 1),
         metrics["val_acc"],
         label = 'val_acc Dense121',
         color = 'red')
# Labels des axes
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
# Affichage de la légende
plt.legend()

plt.subplot(1,2,2)
# Courbe du score de test du réseau Dense 121
plt.plot(np.arange(1 , nb_epochs + 1, 1),
         metrics["loss"], 
         label = 'loss Dense121',
         color = 'blue')
plt.plot(np.arange(1 , nb_epochs + 1, 1),
         metrics["val_loss"],
         label = 'val_loss Dense121',
         color = 'red')

# Labels des axes
plt.xlabel('Epochs')
plt.ylabel('Loss')
# Affichage de la légende
plt.legend()
# Affichage de la figure
plt.show()

### `Confusion Matrix`

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd

on_cuda = True # computed using cuda

y_pred = []
y_true = []

# iterate over test data
for inputs, labels in dataloader_test:
    if on_cuda:
        inputs, labels = inputs.cuda(), labels.cuda()
    output = model(inputs) # Feed Network

    output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
    y_pred.extend(output) # Save Prediction

    labels = labels.data.cpu().numpy()
    y_true.extend(labels) # Save Truth

# constant for classes
classes = folder_to_process

# Build confusion matrix
cf_matrix = confusion_matrix(y_true, y_pred)
df_cm = pd.DataFrame(100*cf_matrix / np.sum(cf_matrix, axis=1)[:, None], 
                     index = [i for i in classes],
                     columns = [i for i in classes])
plt.figure(figsize = (6,4))
sn.heatmap(df_cm, annot=True)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('DenseNet121 Confusion Matrix')

## `Hugging Face model Training and evaluation`

### Loading data for hugging face

In [None]:
from transformers import AutoImageProcessor
from datasets import load_dataset

# Load the dataset
#dataset_path = "/home/processed/covid_19_masked_tiny_500"
dataset = load_dataset("imagefolder", data_dir=dataset_path)

# Initialize the processor for resizing and normalization
processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")

# Define a preprocessing function
def preprocess(batch):
    # Process each image in the batch
    images = []
    for image in batch["image"]:
        # Convert image to RGB if it's grayscale
        if image.mode != "RGB":
            image = image.convert("RGB")
        images.append(image)
        
    # Process images with the processor to resize and normalize
    batch["pixel_values"] = processor(images=images, size=256)["pixel_values"]
    return batch

# Apply preprocessing to the dataset
dataset = dataset.map(preprocess, batched=True)

# Verif
# print(dataset_train[0]["image"])

# Splitting the dataset into 80% train and 20% test
split_dataset = dataset["train"].train_test_split(test_size=0.2)

# Access the train and test splits
dataset_train = split_dataset["train"]
dataset_test = split_dataset["test"]

### Training hyperparameters

Using [transformers/training tutorial](https://huggingface.co/docs/transformers/training)

Create a [TrainingArguments](https://huggingface.co/docs/transformers/v4.46.0/en/main_classes/trainer#transformers.TrainingArguments) class which contains all the hyperparameters you can tune as well as flags for activating different training options.

With 'eval_strategy' parameter in your training arguments to report the evaluation metric at the end of each epoch.

In [None]:
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(
    output_dir="test_trainer",
    eval_strategy="epoch",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_dir="./logs",
    logging_steps=10,
    )

backup_params = """
    output_dir="./results",
    num_train_epochs=3,
    warmup_steps=500,
    weight_decay=0.01,
"""

### Evaluate

[Trainer](https://huggingface.co/docs/transformers/v4.46.0/en/main_classes/trainer#transformers.Trainer) does not automatically evaluate model performance during training. You’ll need to pass Trainer a function to compute and report metrics. The [🤗 Evaluate](https://huggingface.co/docs/evaluate/index) library provides a simple [accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy) function you can load with the evaluate.load (see this [quicktour](https://huggingface.co/docs/evaluate/a_quick_tour) for more information) function:

In [None]:
import numpy as np
import evaluate

metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

### Trainer object

Create a [Trainer](https://huggingface.co/docs/transformers/v4.46.0/en/main_classes/trainer#transformers.Trainer) object with your model, training arguments, training and test datasets, and evaluation function:

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_test,
    compute_metrics=compute_metrics,
)

### Fine tunning

In [None]:
trainer.train()

### Confusion matrix

In [None]:
predictions = trainer.predict(dataset_test)

Recup max prediction

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Step 1: Get predictions
preds = np.argmax(predictions.predictions, axis=1)
labels = predictions.label_ids

# Step 2: Compute confusion matrix
cm = confusion_matrix(labels, preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=dataset_test.features["label"].names)

# Step 3: Plot confusion matrix
fig, ax = plt.subplots(figsize=(4,4))
disp.plot(ax=ax, cmap="Blues", values_format="d")
plt.title("Confusion Matrix")
plt.show()