In [None]:
# TODO : Faire les titres des sections avec un plan défini
# TODO : Faire un sommaire en MD
# TODO : Ajouter les commentaires
# TODO : Ajouter la routine de data augmentation à tous les modèles (tester directement l'intégration du transform de mamba sur les autres modèles)
# TODO : Faire meilleure routine de data augmentation ?
# TODO : Faire pointer les runs de tensorboard sur un répertoire racine qui sera partagé par les 2 notebooks
# TODO : Effacer les expériences et aussi les artifacts en dehors de efficientnet

In [None]:
import os
import warnings
from time import time
from datetime import datetime

import pandas as pd
import numpy as np
from PIL import Image

import matplotlib.pyplot as plt

import torch
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
from timm.data.transforms_factory import create_transform

from transformers import AutoModelForImageClassification, AutoImageProcessor, AutoModel

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from sklearn.metrics import ConfusionMatrixDisplay

from constants import ROOT_FOLDER, SEED, VAL_SIZE, TEST_SIZE, BATCH_SIZE, SAMPLING, INPUT_RESOLUTION, MAMBA_HIDDEN_SIZES


In [9]:
# Désactiver les alertes de FutureWarning
warnings.filterwarnings("ignore", category=FutureWarning)

# Print the torch? cuda and cudnn version
print("Torch version: ", torch.__version__)
print("Cuda version: ", torch.version.cuda)
print("CUDNN version: ", torch.backends.cudnn.version())

# Set the device to GPU if available
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

Torch version:  2.6.0+cu124
Cuda version:  12.4
CUDNN version:  90100
Using device: cuda


In [10]:
# Assign the folder path containing the images
IMAGE_FOLDER = ROOT_FOLDER / "data" / "images"
IMAGE_TEST = ROOT_FOLDER / "data" / "images" / "2aaa6083689193df5ab01fe37dea1b5e.jpg"
# Assign the folder path containing the former H5 efficientnet weights
ARTIFACTS_FOLDER = ROOT_FOLDER / "artifacts"
# Assign the folder path with the pickle dataset with labels, images filenames and metadata
DATASET_PATH = ROOT_FOLDER / "data" / "dataset_cleaned.pkl"

In [11]:
# Loading the pickle dataset_cleaned used with the previous project as a pandas df
df = pd.read_pickle(DATASET_PATH).drop(columns=['product_name', 'description'])
print(f"Dataset shape: {df.shape}")
print(f"Dataset columns: {df.columns}")

# Encode the labels with LabelEncoder
le = LabelEncoder()
le.fit(df["class"])
n_classes = len(le.classes_)
classes = le.classes_.tolist()
print(f"Number of classes: {n_classes}")
print(f"Classes: {classes}")

# Finally transform the class column to the encoded labels
df["class"] = le.transform(df["class"])

Dataset shape: (1050, 2)
Dataset columns: Index(['image', 'class'], dtype='object')
Number of classes: 7
Classes: ['Baby Care', 'Beauty and Personal Care', 'Computers', 'Home Decor & Festive Needs', 'Home Furnishing', 'Kitchen & Dining', 'Watches']


In [12]:
# Splitting the datasets into train, val and test sets
X_temp, X_test, y_temp, y_test = train_test_split(df['image'], df['class'], test_size=TEST_SIZE, random_state=SEED, stratify=df['class'], shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=VAL_SIZE, random_state=SEED, stratify=y_temp, shuffle=True)

# Concat X and y for each set
train = pd.concat([X_train, y_train], axis=1).sample(SAMPLING) if SAMPLING else pd.concat([X_train, y_train], axis=1)
val = pd.concat([X_val, y_val], axis=1).sample(SAMPLING) if SAMPLING else pd.concat([X_val, y_val], axis=1)
test = pd.concat([X_test, y_test], axis=1).sample(SAMPLING) if SAMPLING else pd.concat([X_test, y_test], axis=1)

# Print the shape of each set
print(f"Train shape: {train.shape}")
print(f"Val shape: {val.shape}")
print(f"Test shape: {test.shape}")

Train shape: (758, 2)
Val shape: (134, 2)
Test shape: (158, 2)


In [13]:
train.head(5)

Unnamed: 0,image,class
229,caabe6014b914fe2874a9a8d7284f79b.jpg,3
450,95feec21a9d076cff084159d61bf9b8e.jpg,0
798,9993de7e2bcced43dc9edb3b2c81f23d.jpg,1
230,968a2b3be84193e3f755c2fe71033a2c.jpg,3
293,c2efa8aa11898bdb5fc4e46201973a42.jpg,0


In [14]:
val.head(5)

Unnamed: 0,image,class
979,c44a5dc5b5ebe5b3e0535b7c2b7921e4.jpg,0
49,02a53d335775b652f22f41b529b9d646.jpg,1
567,97fba8a02361aa56eaa9fa51bc1d7661.jpg,6
494,a124d6e4c30b00918c594289266a383c.jpg,6
773,109e235d4838002246599f987d935c21.jpg,0


In [15]:
test.head(5)

Unnamed: 0,image,class
11,08452abdadb3db1e686b94a9c52fc7b6.jpg,6
548,2541b59d54a3a9f2681c0049f7ddd85c.jpg,6
696,82fbc93cd45ab747e7e606f2c52c7335.jpg,3
238,2e8df36b35d22cf219cf8bae6c2af752.jpg,5
963,bcb51cec3d290e6a661586d0df30e091.jpg,4


# **CLASSES AND FUNCTIONS**

In [16]:
# Block DataLoader
class ImageDataset(Dataset):
    def __init__(self, dataframe, image_dir, processor=None, transform=None):
        """
        Args:
            dataframe (pd.DataFrame): DataFrame containing image file names and labels.
            image_dir (str): Directory where images are stored.
            processor (AutoImageProcessor, optional):  Hugging Face processor for image preprocessing. Defaults to None.
            transform (callable, optional): Optional transform to be applied on a sample. Defaults to None.
        """
        super().__init__()
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.processor = processor
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.image_dir, self.dataframe.iloc[idx, 0])  # Assuming image file names are in the first column
        image = Image.open(img_name).convert('RGB')  # Ensure consistent color format

        label = self.dataframe.iloc[idx, 1]  # Assuming labels are in the second column

        if self.processor:
            inputs = self.processor(images=image, return_tensors="pt")
            image = inputs['pixel_values'].squeeze()  # Remove batch dimension
        elif self.transform:
            image = self.transform(image)

        # Convert label to tensor
        label = torch.tensor(label)

        return image, label

In [17]:
class MambaClassifier(nn.Module):
    def __init__(
        self,
        backbone: AutoModel,
        num_classes: int,
        hidden_dim: int , # The hidden dimension of the backbone is stored in the MAMBA_HIDDEN_SIZES dict with the model card as the key
        fc_layer: int = None # Add the int number of layers before the classifier
        ):
        super().__init__()
        self.backbone = backbone
        self.config = self.backbone.config
        if fc_layer:
            self.fc_layers = nn.ModuleList()
            for i in range(fc_layer):
                if i == 0:
                    self.fc_layers.append(nn.Linear(hidden_dim, hidden_dim))
                else:
                    self.fc_layers.append(nn.Linear(hidden_dim, hidden_dim))
                self.fc_layers.append(nn.ReLU())
                self.fc_layers.append(nn.Dropout(0.1))
        self.classifier = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        out_avg_pool, _ = self.backbone(x)
        if hasattr(self, 'fc_layers'):
            for layer in self.fc_layers:
                out_avg_pool = layer(out_avg_pool)
        logits = self.classifier(out_avg_pool)
        return logits

In [18]:
def classification_report_to_markdown(y_true, y_pred, target_names):
    """
    Convert the classification report to a markdown table.
    """
    report = classification_report(y_true, y_pred, target_names=target_names, zero_division=0, output_dict=True)
    report_df = pd.DataFrame(report).transpose()
    report_df = report_df.drop(columns=['support'])
    report_df = report_df.rename_axis('Classes').reset_index()
    return report_df.to_markdown(index=False)

In [19]:
def test_model(
    model: AutoModelForImageClassification|MambaClassifier,
    experiment_id: str,
    test_loader: DataLoader,
    criterion: nn.Module,
    writer: SummaryWriter = None
    ):
    # Load the best model
    model.load_state_dict(torch.load(ARTIFACTS_FOLDER / f'{experiment_id}.pth'))
    model.to(DEVICE)
    model.eval()

    # Check if the model is a MambaClassifier instance
    mamba = "MambaClassifier" in type(model).__name__
    
    # Initialize the test metrics
    test_loss, correct_test, total_test = .0, 0, 0
    running_test_steps = 0
    running_test_time_by_step = .0
    y_true, y_pred = [], []
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            step_time = time()
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)
            outputs = model(inputs).logits if not mamba else model(inputs)
            test_loss += criterion(outputs, labels).item() * inputs.size(0)
            preds = outputs.argmax(dim=1)
            correct_test += (preds == labels).sum().item()
            total_test += inputs.size(0)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
            running_test_time_by_step += time() - step_time
            running_test_steps += 1
            if writer:
                writer.add_scalar('TimingByStep/test', running_test_time_by_step, running_test_steps)

    epoch_test_loss = test_loss / total_test
    epoch_test_acc  = correct_test / total_test
    if writer:
        writer.add_scalar('Accuracy/test', epoch_test_acc, 0)

    print(f"Test Loss: {epoch_test_loss:.4f}, Test Acc: {epoch_test_acc:.4f}")
    
    # Save Classification report in tensorboard
    classification_report_md = classification_report_to_markdown(y_true, y_pred, target_names=classes)
    cm = ConfusionMatrixDisplay.from_predictions(
        y_true, y_pred,
        labels=range(n_classes),
        normalize='true',
        display_labels=classes,
        xticks_rotation="vertical",
        cmap=plt.cm.Blues
        )
    # Save the classification report as a markdown file in tensorboard
    if writer:
        writer.add_text('ClassificationReport/test', classification_report_md, 0)
        writer.add_figure('ConfusionMatrix/test', cm.figure_, 0)
    # Delete the model from GPU memory
    del model, inputs, labels, outputs
    torch.cuda.empty_cache()

In [20]:
def train_and_eval_model(
    model: AutoModelForImageClassification|MambaClassifier,
    model_card: str,
    train_loader: DataLoader,
    val_loader: DataLoader,
    test_loader: DataLoader,
    optimizer: torch.optim.Optimizer,
    criterion: nn.Module,
    num_epochs: int,):
    # --- LOOP ---
    # Initialize SummaryWriter
    experiment_id = "_".join([datetime.now().strftime("%Y%m%d-%H%M%S"), model_card.split("/")[-1]])
    log_dir = ROOT_FOLDER / os.getcwd().split("/")[-1] / "runs" / "_".join([experiment_id, model_card.split("/")[-1]])
    writer = SummaryWriter(log_dir)

    # Check if the model is a MambaClassifier instance
    mamba = "MambaClassifier" in type(model).__name__

    # Initialize commun metrics
    best_epoch = 0
    
    # Initialize the training metrics
    running_train_time_by_step = .0
    running_train_time_by_epoch = .0
    running_train_steps = 0
    writer.add_scalar('TimingByEpoch/train', running_train_time_by_epoch, 0)

    # Initialize the validation metrics
    best_val_metric = float('-inf')
    running_val_time_by_step = .0
    running_val_time_by_epoch = .0
    running_val_steps = 0
    writer.add_scalar('TimingByEpoch/validation', running_val_time_by_epoch, 0)
    
    writer.add_scalar('TimingByEpoch/train', running_train_time_by_epoch, 0)
    # Move model to the device
    model.to(DEVICE)

    # Training loop
    for epoch in range(num_epochs):
        # --- 1. TRAINING LOOP ---
        model.train()
        train_loss, correct_train, total_train = 0.0, 0, 0
        epoch_time = time()
        for i, (inputs, labels) in enumerate(train_loader): # Use enumerate for step count
            step_time = time()
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)
            optimizer.zero_grad()
            # Compute the model outputs given the mamba variable
            outputs = model(inputs).logits if not mamba else model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            step_loss = loss.item() * inputs.size(0)
            train_loss += step_loss
            preds_train = outputs.argmax(dim=1)
            correct_train += (preds_train == labels).sum().item()
            total_train += inputs.size(0)
            running_train_steps += 1
            running_train_time_by_step += time() - step_time
            writer.add_scalar('TimingByStep/train', running_train_time_by_step, running_train_steps)
            writer.add_scalar('LossByStep/train', step_loss, running_train_steps)

        running_train_time_by_epoch += time() - epoch_time
        epoch_train_loss = train_loss / total_train
        epoch_train_acc = correct_train / total_train
        # Log training metrics per epoch
        writer.add_scalar('LossByEpoch/train', epoch_train_loss, epoch)
        writer.add_scalar('AccuracyByEpoch/train', epoch_train_acc, epoch)
        writer.add_scalar('TimingByEpoch/train', running_train_time_by_epoch, epoch)
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_train_loss:.4f}, Train Acc: {epoch_train_acc:.4f}")


        # --- 2. VALIDATION LOOP ---
        model.eval()
        val_loss, correct_val, total_val = .0, 0, 0
        epoch_time = time()
        with torch.no_grad():
            for inputs, labels in val_loader:
                # Move images and labels to the device
                step_time = time()
                inputs = inputs.to(DEVICE)
                labels = labels.to(DEVICE)
                outputs = model(inputs).logits if not mamba else model(inputs)
                val_step_loss = criterion(outputs, labels).item() * inputs.size(0)
                val_loss   += val_step_loss
                preds      = outputs.argmax(dim=1)
                correct_val   += (preds == labels).sum().item()
                total_val     += inputs.size(0)
                running_val_steps += 1
                running_val_time_by_step += time() - step_time
                writer.add_scalar('TimingByStep/validation', running_val_time_by_step, running_val_steps)
                writer.add_scalar('LossByStep/validation', val_step_loss, running_train_steps)

        running_val_time_by_epoch += time() - epoch_time
        epoch_val_loss = val_loss / total_val
        epoch_val_acc  = correct_val / total_val
        # Log validation metrics per epoch
        writer.add_scalar('Loss/validation', epoch_val_loss, epoch)
        writer.add_scalar('Accuracy/validation', epoch_val_acc, epoch)
        writer.add_scalar('TimingByEpoch/validation', running_val_time_by_epoch, epoch)
        print(f"Epoch [{epoch+1}/{num_epochs}], Val Loss: {epoch_val_loss:.4f}, Val Acc: {epoch_val_acc:.4f}")

        # --- 3. UPDATE BEST MODEL ---
        # Save the model if the validation accuracy is better than the best one
        if epoch_val_acc > best_val_metric:
            best_val_metric = epoch_val_acc
            best_epoch      = epoch
            torch.save(model.state_dict(), ARTIFACTS_FOLDER / f'{experiment_id}.pth')
            print(f"Best model updated at epoch {best_epoch} with val acc: {best_val_metric:.4f}")
    
    # Delete the model from GPU memory
    del inputs, labels, outputs, optimizer, loss, val_loss, step_loss, val_step_loss
    torch.cuda.empty_cache()

    # --- 4. TESTING LOOP ---
    test_model(model, experiment_id, test_loader, criterion, writer)

    # --- After the training loop ---
    writer.close() # Close the writer

# EXPERIMENTS

In [None]:
# TODO : Ajouter une card dans le tensorboard pour les paramètres du modèle
# TODO : warning PIL decompression bomb warning
# TODO : Nettoyer les fichiers useless
# TODO : Tester le batch size de 16

## **Google VIT**

In [14]:
# Assigne the model card name
model_card = "google/vit-base-patch16-224-in21k"

# Define the model
model = AutoModelForImageClassification.from_pretrained(model_card, num_labels=n_classes, trust_remote_code=True)

# Define the image processor
processor = AutoImageProcessor.from_pretrained(model_card)

# Prepare the dataloaders for training, validation and testing
dataset = ImageDataset(dataframe=train, image_dir=IMAGE_FOLDER, processor=processor)
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
dataset = ImageDataset(dataframe=val, image_dir=IMAGE_FOLDER, processor=processor)
val_loader = DataLoader(dataset, batch_size=BATCH_SIZE)
dataset = ImageDataset(dataframe=test, image_dir=IMAGE_FOLDER, processor=processor)
test_loader = DataLoader(dataset, batch_size=BATCH_SIZE)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


In [None]:
# Set the training parameters
num_epochs = 5
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
criterion = torch.nn.CrossEntropyLoss()

# Run the training workflow for AutoModelForImageClassification
train_and_eval_model(
    model = model,
    model_card = model_card,
    train_loader = train_loader,
    val_loader = val_loader,
    test_loader = test_loader,
    optimizer = optimizer,
    criterion = criterion,
    num_epochs = num_epochs,)

Epoch [1/5], Train Loss: 1.9503, Train Acc: 0.1172
Epoch [1/5], Val Loss: 1.9338, Val Acc: 0.1562
Best model updated at epoch 0 with val acc: 0.1562
Epoch [2/5], Train Loss: 1.8478, Train Acc: 0.5312
Epoch [2/5], Val Loss: 1.8971, Val Acc: 0.3281
Best model updated at epoch 1 with val acc: 0.3281
Epoch [3/5], Train Loss: 1.7559, Train Acc: 0.7969
Epoch [3/5], Val Loss: 1.8617, Val Acc: 0.4453
Best model updated at epoch 2 with val acc: 0.4453
Epoch [4/5], Train Loss: 1.6605, Train Acc: 0.9219
Epoch [4/5], Val Loss: 1.8227, Val Acc: 0.5156
Best model updated at epoch 3 with val acc: 0.5156
Epoch [5/5], Train Loss: 1.5614, Train Acc: 0.9688
Epoch [5/5], Val Loss: 1.7789, Val Acc: 0.5781
Best model updated at epoch 4 with val acc: 0.5781
Test Loss: 1.8062, Test Acc: 0.5391


## MOBILENETV2

In [None]:
"google/mobilenet_v2_1.0_224"

## **MAMBA T 1 K**

In [21]:
# Assigne the model card name
model_card = "nvidia/MambaVision-T-1K"

# Define the model with the MambaClassifier class
model = MambaClassifier(
    AutoModel.from_pretrained(model_card, trust_remote_code=True),
    num_classes=n_classes,
    hidden_dim=MAMBA_HIDDEN_SIZES.get(model_card),
    )

# Define the image processor following MAMBA's preprocessing instructions
transform_train = create_transform(input_size=INPUT_RESOLUTION,
                             is_training=True, # Use the data augmentation for training
                             mean=model.config.mean,
                             std=model.config.std,
                             crop_mode=model.config.crop_mode,
                             crop_pct=model.config.crop_pct)

transform_val = create_transform(input_size=INPUT_RESOLUTION,
                             is_training=False,
                             mean=model.config.mean,
                             std=model.config.std,
                             crop_mode=model.config.crop_mode,
                             crop_pct=model.config.crop_pct)

# Prepare the dataloaders for training, validation and testing
dataset = ImageDataset(dataframe=train, image_dir=IMAGE_FOLDER, transform=transform_train)
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
dataset = ImageDataset(dataframe=val, image_dir=IMAGE_FOLDER, transform=transform_val)
val_loader = DataLoader(dataset, batch_size=BATCH_SIZE)
dataset = ImageDataset(dataframe=test, image_dir=IMAGE_FOLDER, transform=transform_val)
test_loader = DataLoader(dataset, batch_size=BATCH_SIZE)

In [22]:
# Set the training parameters
num_epochs = 5
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
criterion = torch.nn.CrossEntropyLoss()

# Run the training workflow for AutoModelForImageClassification
train_and_eval_model(
    model = model,
    model_card = model_card,
    train_loader = train_loader,
    val_loader = val_loader,
    test_loader = test_loader,
    optimizer = optimizer,
    criterion = criterion,
    num_epochs = num_epochs,)



Epoch [1/5], Train Loss: 1.8855, Train Acc: 0.2797
Epoch [1/5], Val Loss: 1.7303, Val Acc: 0.5149
Best model updated at epoch 0 with val acc: 0.5149
Epoch [2/5], Train Loss: 1.7018, Train Acc: 0.5290
Epoch [2/5], Val Loss: 1.4372, Val Acc: 0.7015
Best model updated at epoch 1 with val acc: 0.7015
Epoch [3/5], Train Loss: 1.4369, Train Acc: 0.6781
Epoch [3/5], Val Loss: 1.0409, Val Acc: 0.8433
Best model updated at epoch 2 with val acc: 0.8433
Epoch [4/5], Train Loss: 1.1507, Train Acc: 0.7216
Epoch [4/5], Val Loss: 0.7072, Val Acc: 0.8731
Best model updated at epoch 3 with val acc: 0.8731
Epoch [5/5], Train Loss: 0.9697, Train Acc: 0.7414
Epoch [5/5], Val Loss: 0.5560, Val Acc: 0.8806
Best model updated at epoch 4 with val acc: 0.8806
Test Loss: 0.7440, Test Acc: 0.7785


## **MAMBA B 21 K**

In [14]:
model_card = "nvidia/MambaVision-B-21K"

# Define the model with the MambaClassifier class
model = MambaClassifier(
    AutoModel.from_pretrained(model_card, trust_remote_code=True),
    num_classes=n_classes,
    hidden_dim=MAMBA_HIDDEN_SIZES.get(model_card),
    )

# Define the image processor following MAMBA's preprocessing instructions
transform_train = create_transform(input_size=INPUT_RESOLUTION,
                             is_training=True, # Use the data augmentation for training
                             mean=model.config.mean,
                             std=model.config.std,
                             crop_mode=model.config.crop_mode,
                             crop_pct=model.config.crop_pct)

transform_val = create_transform(input_size=INPUT_RESOLUTION,
                             is_training=False,
                             mean=model.config.mean,
                             std=model.config.std,
                             crop_mode=model.config.crop_mode,
                             crop_pct=model.config.crop_pct)

# Prepare the dataloaders for training, validation and testing
dataset = ImageDataset(dataframe=train, image_dir=IMAGE_FOLDER, transform=transform_train)
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
dataset = ImageDataset(dataframe=val, image_dir=IMAGE_FOLDER, transform=transform_val)
val_loader = DataLoader(dataset, batch_size=BATCH_SIZE)
dataset = ImageDataset(dataframe=test, image_dir=IMAGE_FOLDER, transform=transform_val)
test_loader = DataLoader(dataset, batch_size=BATCH_SIZE)

In [15]:
# Set the training parameters
num_epochs = 5
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
criterion = torch.nn.CrossEntropyLoss()

# Run the training workflow for AutoModelForImageClassification
train_and_eval_model(
    model = model,
    model_card = model_card,
    train_loader = train_loader,
    val_loader = val_loader,
    test_loader = test_loader,
    optimizer = optimizer,
    criterion = criterion,
    num_epochs = num_epochs,)

Epoch [1/5], Train Loss: 1.9823, Train Acc: 0.1172
Epoch [1/5], Val Loss: 1.9017, Val Acc: 0.2422
Best model updated at epoch 0 with val acc: 0.2422
Epoch [2/5], Train Loss: 1.8869, Train Acc: 0.1875
Epoch [2/5], Val Loss: 1.8000, Val Acc: 0.3438
Best model updated at epoch 1 with val acc: 0.3438
Epoch [3/5], Train Loss: 1.8043, Train Acc: 0.3359
Epoch [3/5], Val Loss: 1.7076, Val Acc: 0.4297
Best model updated at epoch 2 with val acc: 0.4297
Epoch [4/5], Train Loss: 1.6682, Train Acc: 0.4766
Epoch [4/5], Val Loss: 1.6223, Val Acc: 0.5156
Best model updated at epoch 3 with val acc: 0.5156
Epoch [5/5], Train Loss: 1.5941, Train Acc: 0.5703
Epoch [5/5], Val Loss: 1.5438, Val Acc: 0.5938
Best model updated at epoch 4 with val acc: 0.5938
Test Loss: 1.6233, Test Acc: 0.4688
