In [None]:
# GENERAL
# TODO : Compléter toutes les parties en MARKDOWN
# TODO : IF NEEDED créer une data aug avec albumentations
# TODO : Lire l'extension devcontainers

# EXPERIMENTS
# TODO : Ajouter une card dans le tensorboard pour les paramètres du modèle
# TODO : warning PIL decompression bomb warning ,
# TODO : Tester le batch size de 16

# README

#TODO Ce notebook présente une analyse de données sur les ventes de jeux vidéo. L'objectif est d'explorer les tendances des ventes en fonction de la plateforme, du genre et de la région.

# PRE-REQUIS

Ce bloc contient tout ce qui est nécessaire pour le fonctionnement des expériences.

## Imports & Configurations

In [2]:
import os
import warnings
from time import time
from datetime import datetime

import pandas as pd
from PIL import Image

import matplotlib.pyplot as plt

import torch
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
from timm.data.transforms_factory import create_transform

from transformers import AutoModelForImageClassification, AutoImageProcessor, AutoModel

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from sklearn.metrics import ConfusionMatrixDisplay

from constants import ROOT_FOLDER, IMAGE_FOLDER, ARTIFACTS_FOLDER, DATASET_PATH
from constants import SEED, VAL_SIZE, TEST_SIZE, BATCH_SIZE, SAMPLING, INPUT_RESOLUTION
from constants import MAMBA_HIDDEN_SIZES

In [3]:
# Gestion des avertissements
warnings.filterwarnings("ignore", category=FutureWarning)

In [4]:
# Configuration de cuda avec PyTorch
print("Torch version: ", torch.__version__)
print("Cuda version: ", torch.version.cuda)
print("CUDNN version: ", torch.backends.cudnn.version())

# Set the device to GPU if available
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

Torch version:  2.6.0+cu124
Cuda version:  12.4
CUDNN version:  90100
Using device: cuda


## Préparation des données

### Chargement du dataset

In [5]:
# Loading the pickle dataset_cleaned used with the previous project as a pandas df
df = pd.read_pickle(DATASET_PATH).drop(columns=["product_name", "description"])
print(f"Dataset shape: {df.shape}")
print(f"Dataset columns: {df.columns}")

# Encode the labels with LabelEncoder
le = LabelEncoder()
le.fit(df["class"])
N_CLASSES = len(le.classes_)
CLASSES = le.classes_.tolist()
print(f"Number of classes: {N_CLASSES}")
print(f"Classes: {CLASSES}")

# Finally transform the class column to the encoded labels
df["class"] = le.transform(df["class"])

Dataset shape: (1050, 2)
Dataset columns: Index(['image', 'class'], dtype='object')
Number of classes: 7
Classes: ['Baby Care', 'Beauty and Personal Care', 'Computers', 'Home Decor & Festive Needs', 'Home Furnishing', 'Kitchen & Dining', 'Watches']


### Séparation des données (train/validation/test)

In [6]:
# Splitting the datasets into train, val and test sets
X_temp, X_test, y_temp, y_test = train_test_split(
    df["image"],
    df["class"],
    test_size=TEST_SIZE,
    random_state=SEED,
    stratify=df["class"],
    shuffle=True,
)
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=VAL_SIZE, random_state=SEED, stratify=y_temp, shuffle=True
)

# Concat X and y for each set
train = (
    pd.concat([X_train, y_train], axis=1).sample(SAMPLING)
    if SAMPLING
    else pd.concat([X_train, y_train], axis=1)
)
val = (
    pd.concat([X_val, y_val], axis=1).sample(SAMPLING)
    if SAMPLING
    else pd.concat([X_val, y_val], axis=1)
)
test = (
    pd.concat([X_test, y_test], axis=1).sample(SAMPLING)
    if SAMPLING
    else pd.concat([X_test, y_test], axis=1)
)

# Print the shape of each set
print(f"Train shape: {train.shape}")
print(f"Val shape: {val.shape}")
print(f"Test shape: {test.shape}")

Train shape: (32, 2)
Val shape: (32, 2)
Test shape: (32, 2)


In [7]:
train.head(5)

Unnamed: 0,image,class
301,23704dd51c975e845c574b044aae0a9f.jpg,1
1033,ace154420a51fad090b3543995630051.jpg,4
200,168618e93387ad7171d4e4e1eeff9d1a.jpg,3
880,fc3eb6ffed257270c26943e9f9c347b9.jpg,4
123,5c77aa1fc09901ae07c392c152a70e41.jpg,3


In [8]:
val.head(5)

Unnamed: 0,image,class
1036,5a54c78b52c984e56500809e5bc27ae1.jpg,2
661,8c90f3ca64ea1a8ce104c3c3f5fc173f.jpg,3
297,6ed34e1ab886e8a702ec59dd66ba4dff.jpg,0
429,9fb8662af03c957ade34d4c816d4e903.jpg,2
773,109e235d4838002246599f987d935c21.jpg,0


In [9]:
test.head(5)

Unnamed: 0,image,class
403,b1c7aa8e0f50e7ee73ed30d12af1a961.jpg,2
682,91795f4b0e4aac27314477a91b63bfb7.jpg,1
583,35a68b44bef47a809314493d28535b9d.jpg,6
709,dbca6ab4ebf827884073e2890388b744.jpg,0
869,d218c32df572e82db50faecd62179db2.jpg,4


## Classes et Fonctions

### DataLoader

[ ] ***TODO*** :

In [10]:
# Block DataLoader
from pathlib import PosixPath

class ImageDataset(Dataset):
    def __init__(
        self,
        dataframe: pd.DataFrame,
        image_dir: PosixPath,
        processor: AutoImageProcessor = None,
        transform: callable = None,
    ):
        """
        Args:
            dataframe (pd.DataFrame): DataFrame containing image file names and labels.
            image_dir (PosixPath): Directory where images are stored.
            processor (AutoImageProcessor, optional): Hugging Face processor for image preprocessing. Defaults to None.
            transform (callable, optional): Optional transform to be applied on a sample. Defaults to None.
        """
        super().__init__()
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.processor = processor
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(
            self.image_dir, self.dataframe.iloc[idx, 0]
        )  # Assuming image file names are in the first column
        image = Image.open(img_name).convert("RGB")  # Ensure consistent color format

        label = self.dataframe.iloc[idx, 1]  # Assuming labels are in the second column

        if self.processor:
            inputs = self.processor(images=image, return_tensors="pt")
            image = inputs["pixel_values"].squeeze()  # Remove batch dimension
        elif self.transform:
            image = self.transform(image)

        # Convert label to tensor
        label = torch.tensor(label)

        return image, label

### MambaClassifier

[ ] ***TODO*** : L'ajout de la classe MambaClassifier permet de créer un classificateur basé sur le modèle MambaVision en tenant compte de ses spécificités. Cette classe hérite de la classe `nn.Module` de PyTorch et encapsule le modèle MambaVision, permettant ainsi de l'utiliser comme un classificateur dans le cadre d'une tâche de classification d'images.

In [19]:
# Block MambaClassifier
class MambaClassifier(nn.Module):
    def __init__(
        self,
        backbone: AutoModel,
        num_classes: int,
        hidden_dim: int,  # The hidden dimension of the backbone is stored in the MAMBA_HIDDEN_SIZES dict with the model card as the key
        fc_layer: int = None,  # Add the int number of layers before the classifier
    ):
        super().__init__()
        self.backbone = backbone
        self.config = self.backbone.config
        if fc_layer:
            self.fc_layers = nn.ModuleList()
            for i in range(fc_layer):
                if i == 0:
                    self.fc_layers.append(nn.Linear(hidden_dim, hidden_dim))
                else:
                    self.fc_layers.append(nn.Linear(hidden_dim, hidden_dim))
                self.fc_layers.append(nn.ReLU())
                self.fc_layers.append(nn.Dropout(0.1))
        self.classifier = nn.Linear(hidden_dim, num_classes)

    def create_transform(self, training: bool, auto_augment=None):
        transform = create_transform(
            input_size=INPUT_RESOLUTION,
            is_training=training,  # Add a ColorJitter augmentation during training
            mean=self.config.mean,
            std=self.config.std,
            crop_mode=self.config.crop_mode,
            crop_pct=self.config.crop_pct,
            auto_augment=auto_augment, # "rand-m9-mstd0.5-inc1"
            )
        return transform

    def forward(self, x):
        out_avg_pool, _ = self.backbone(x)
        if hasattr(self, "fc_layers"):
            for layer in self.fc_layers:
                out_avg_pool = layer(out_avg_pool)
        logits = self.classifier(out_avg_pool)
        return logits

### TorchPipeline

[ ] ***TODO*** : Un pipeline de traitement des données est créé pour gérer les transformations d'images et les normalisations nécessaires avant de passer les données au modèle. Ce pipeline utilise la bibliothèque `torchvision` pour appliquer des transformations telles que le redimensionnement, le recadrage, la conversion en tenseur et la normalisation.

In [12]:
class TorchPipeline:
    def __init__(self, model_card, model, processor=None, train_transform=None, test_transform=None):
        # Initialize attributes from parameters
        self.model_card = model_card
        self.model = model
        self.processor = processor
        self.train_transform = train_transform
        self.test_transform = test_transform

        # Check if the model is a MambaClassifier instance
        self.mamba = "MambaClassifier" in type(model).__name__

        # Initialize FIXED attributes from constants.py
        self.device = DEVICE
        self.root_folder = ROOT_FOLDER
        self.artifacts_folder = ARTIFACTS_FOLDER
        self.dataset_path = DATASET_PATH
        self.image_folder = IMAGE_FOLDER
        self.batch_size = BATCH_SIZE
        self.classes = CLASSES
        self.n_classes = N_CLASSES

        # Initialize empty attributes
        self.train_loader = None
        self.val_loader = None
        self.test_loader = None
        self.writer = None
        self.experiment_id = None

    @staticmethod
    def classification_report_to_markdown(y_true, y_pred, target_names):
        """
        Convert the classification report to a markdown table.
        """
        report = classification_report(
            y_true, y_pred, target_names=target_names, zero_division=0, output_dict=True
        )
        report_df = pd.DataFrame(report).transpose()
        report_df = report_df.drop(columns=["support"])
        report_df = report_df.rename_axis("Classes").reset_index()
        return report_df.to_markdown(index=False)

    def generate_experiment_id(self):
        """
        Generate a unique experiment ID based on the current date and time.
        """
        return "_".join(
            [datetime.now().strftime("%Y%m%d-%H%M%S"), self.model_card.split("/")[-1]]
        )

    # Load the data, apply the processor and transform, and create the dataloaders
    def generate_dataloader(self, train, val, test):
        # Apply the processor and transform
        train_dataset = ImageDataset(dataframe=train, image_dir=self.image_folder, processor=self.processor, transform=self.train_transform)
        val_dataset = ImageDataset(dataframe=val, image_dir=self.image_folder, processor=self.processor, transform=self.test_transform)
        test_dataset = ImageDataset(dataframe=test, image_dir=self.image_folder, processor=self.processor, transform=self.test_transform)

        # Create the dataloaders
        self.train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
        self.val_loader = DataLoader(val_dataset, batch_size=self.batch_size)
        self.test_loader = DataLoader(test_dataset, batch_size=self.batch_size)


    def train_and_eval_model(self, criterion, optimizer, num_epochs):
        # Generate the experiment ID
        self.experiment_id = self.generate_experiment_id()
        # Create the writer
        log_dir = (
            ROOT_FOLDER
            # TODO: Delete if OK / os.getcwd().split("/")[-1]
            / "runs"
            / self.experiment_id
        )
        self.writer = SummaryWriter(log_dir)

        # Initialize the training metrics
        running_train_time_by_step = 0.0
        running_train_time_by_epoch = 0.0
        running_train_steps = 0
        self.writer.add_scalar("TimingByEpoch/train", running_train_time_by_epoch, 0)

        # Initialize the validation metrics
        best_val_metric = float("-inf")
        running_val_time_by_step = 0.0
        running_val_time_by_epoch = 0.0
        running_val_steps = 0
        self.writer.add_scalar("TimingByEpoch/validation", running_val_time_by_epoch, 0)

        self.writer.add_scalar("TimingByEpoch/train", running_train_time_by_epoch, 0)
        # Move model to the device
        self.model.to(DEVICE)

        # Training loop
        print("TRAINING EXPERIMENT ID <{}>".format(self.experiment_id))
        print("==========================")
        for epoch in range(num_epochs):
            # --- 1. TRAINING LOOP ---
            self.model.train()
            train_loss, correct_train, total_train = 0.0, 0, 0
            epoch_time = time()
            for i, (inputs, labels) in enumerate(
                self.train_loader
            ):  # Use enumerate for step count
                step_time = time()
                inputs = inputs.to(DEVICE)
                labels = labels.to(DEVICE)
                optimizer.zero_grad()
                # Compute the model outputs given the mamba variable
                outputs = self.model(inputs).logits if not self.mamba else self.model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                step_loss = loss.item() * inputs.size(0)
                train_loss += step_loss
                preds_train = outputs.argmax(dim=1)
                correct_train += (preds_train == labels).sum().item()
                total_train += inputs.size(0)
                running_train_steps += 1
                running_train_time_by_step += time() - step_time
                self.writer.add_scalar(
                    "TimingByStep/train", running_train_time_by_step, running_train_steps
                )
                self.writer.add_scalar("LossByStep/train", step_loss, running_train_steps)

            running_train_time_by_epoch += time() - epoch_time
            epoch_train_loss = train_loss / total_train
            epoch_train_acc = correct_train / total_train
            # Log training metrics per epoch
            self.writer.add_scalar("LossByEpoch/train", epoch_train_loss, epoch)
            self.writer.add_scalar("Accuracy/train", epoch_train_acc, epoch)
            self.writer.add_scalar("TimingByEpoch/train", running_train_time_by_epoch, epoch)
            stats = f"Epoch [{epoch + 1}/{num_epochs}] | Train_Loss: {epoch_train_loss:.4f} | Train_Acc: {epoch_train_acc:.4f}"

            # --- 2. VALIDATION LOOP ---
            self.model.eval()
            val_loss, correct_val, total_val = 0.0, 0, 0
            epoch_time = time()
            with torch.no_grad():
                for inputs, labels in self.val_loader:
                    # Move images and labels to the device
                    step_time = time()
                    inputs = inputs.to(DEVICE)
                    labels = labels.to(DEVICE)
                    outputs = self.model(inputs).logits if not self.mamba else self.model(inputs)
                    val_step_loss = criterion(outputs, labels).item() * inputs.size(0)
                    val_loss += val_step_loss
                    preds = outputs.argmax(dim=1)
                    correct_val += (preds == labels).sum().item()
                    total_val += inputs.size(0)
                    running_val_steps += 1
                    running_val_time_by_step += time() - step_time
                    self.writer.add_scalar(
                        "TimingByStep/validation",
                        running_val_time_by_step,
                        running_val_steps,
                    )
                    self.writer.add_scalar(
                        "LossByStep/validation", val_step_loss, running_train_steps
                    )

            running_val_time_by_epoch += time() - epoch_time
            epoch_val_loss = val_loss / total_val
            epoch_val_acc = correct_val / total_val
            # Log validation metrics per epoch
            self.writer.add_scalar("Loss/validation", epoch_val_loss, epoch)
            self.writer.add_scalar("Accuracy/validation", epoch_val_acc, epoch)
            self.writer.add_scalar("TimingByEpoch/validation", running_val_time_by_epoch, epoch)
            stats += f" | Val_Loss: {epoch_val_loss:.4f} | Val_Acc: {epoch_val_acc:.4f}"

            # --- 3. UPDATE BEST MODEL ---
            # Save the model if the validation accuracy is better than the best one
            if epoch_val_acc > best_val_metric:
                best_val_metric = epoch_val_acc
                torch.save(self.model.state_dict(), ARTIFACTS_FOLDER / f"{self.experiment_id}.pth")
                stats += " -> Best model updated"
            # Print the stats at the end of each epoch
            print(stats)
         
        # Delete the model from GPU memory
        del inputs, labels, outputs, optimizer, criterion, loss, val_loss, step_loss, val_step_loss
        torch.cuda.empty_cache()
        print(f"Training completed. Best validation accuracy: {best_val_metric:.4f}. Running steps training time: {running_train_time_by_epoch:.2f} s.\n")
    
    def test_model(self, criterion, with_id=None,):
        # Case when no id has been provided
        if with_id is None:
            try:
                self.model.load_state_dict(torch.load(ARTIFACTS_FOLDER / f"{self.experiment_id}.pth"))
            except FileNotFoundError:
                print("No id has been provided and no model has been trained yet. Train the model first before testing.")
                return
        # Case when id has been provided
        else:
            try:
                self.model.load_state_dict(torch.load(ARTIFACTS_FOLDER / f"{with_id}.pth"))
            except FileNotFoundError:
                print(f"Model {with_id}.pth not found in {ARTIFACTS_FOLDER}")
                print("Please provide a valid model ID.")
                return
      
        # If no error is raised, the model is loaded successfully
        self.model.to(DEVICE)
        self.model.eval()

        # Initialize the test metrics
        test_loss, correct_test, total_test = 0.0, 0, 0
        running_test_steps = 0
        running_test_time_by_step = 0.0
        y_true, y_pred = [], []

        print(f"TESTING EXPERIMENT ID <{with_id if with_id else self.experiment_id}>")
        print("==========================")
        with torch.no_grad():
            for inputs, labels in self.test_loader:
                step_time = time()
                inputs = inputs.to(DEVICE)
                labels = labels.to(DEVICE)
                outputs = self.model(inputs).logits if not self.mamba else self.model(inputs)
                test_loss += criterion(outputs, labels).item() * inputs.size(0)
                preds = outputs.argmax(dim=1)
                correct_test += (preds == labels).sum().item()
                total_test += inputs.size(0)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(preds.cpu().numpy())
                running_test_time_by_step += time() - step_time
                running_test_steps += 1
                if self.writer:
                    self.writer.add_scalar(
                        "TimingByStep/test", running_test_time_by_step, running_test_steps
                    )

        epoch_test_loss = test_loss / total_test
        epoch_test_acc = correct_test / total_test
        if self.writer:
            self.writer.add_scalar("Accuracy/test", epoch_test_acc, 0)

        print(f"Test Loss: {epoch_test_loss:.4f} | Test Acc: {epoch_test_acc:.4f} | Running steps test time: {running_test_time_by_step:.2f} s.")

        # Create the Classification report
        classification_report_md = self.classification_report_to_markdown(
            y_true, y_pred, target_names=self.classes
        )

        # Create the confusion matrix
        cm = ConfusionMatrixDisplay.from_predictions(
            y_true,
            y_pred,
            labels=range(self.n_classes),
            normalize="true",
            display_labels=self.classes,
            xticks_rotation="vertical",
            cmap=plt.cm.Blues,
        )
        # Save the classification report as a markdown file in tensorboard
        if self.writer:
            self.writer.add_text("ClassificationReport/test", classification_report_md, 0)
            self.writer.add_figure("ConfusionMatrix/test", cm.figure_, 0)
            self.writer.close() # Close the writer after the end of the pipeline
        # If no writer is provided, print the classification report & confusion matrix
        else:
            print(classification_report_md)
            cm.figure_.show()
        
        # Delete the model from GPU memory
        del self.model, inputs, labels, outputs
        torch.cuda.empty_cache()



# EXPERIMENTS

## **Google VIT**

In [13]:
# Assigne the model card name
model_card = "google/vit-base-patch16-224-in21k"

# Define the model
model = AutoModelForImageClassification.from_pretrained(
    model_card, num_labels=N_CLASSES, trust_remote_code=True
)

# Define the image processor
processor = AutoImageProcessor.from_pretrained(model_card)

# Create the TorchPipeline object
pipeline = TorchPipeline(
    model_card=model_card,
    model=model,
    processor=processor,
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


In [14]:
# Define the num_epochs, optimizer and criterion
num_epochs = 3
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
criterion = torch.nn.CrossEntropyLoss()

# Generate the dataloaders
pipeline.generate_dataloader(train, val, test)
# Train and evaluate the model
pipeline.train_and_eval_model(
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=num_epochs,
)
# Test the model
pipeline.test_model(criterion=criterion)

TRAINING EXPERIMENT ID <20250502-194155_vit-base-patch16-224-in21k>
Epoch [1/3] | Train_Loss: 1.9381 | Train_Acc: 0.1562 | Val_Loss: 1.9423 | Val_Acc: 0.1250 -> Best model updated
Epoch [2/3] | Train_Loss: 1.8552 | Train_Acc: 0.5312 | Val_Loss: 1.9340 | Val_Acc: 0.1562 -> Best model updated
Epoch [3/3] | Train_Loss: 1.7899 | Train_Acc: 0.7188 | Val_Loss: 1.9253 | Val_Acc: 0.1875 -> Best model updated
Training completed. Best validation accuracy: 0.1875. Running steps training time: 10.79 s.

TESTING EXPERIMENT ID <20250502-194155_vit-base-patch16-224-in21k>
Test Loss: 1.9142 | Test Acc: 0.2812 | Running steps test time: 0.81 s.


## MOBILENETV2

In [None]:
"google/mobilenet_v2_1.0_224"

## **MAMBA T 1 K**

In [None]:
# Assigne the model card name
model_card = "nvidia/MambaVision-T-1K"

# Define the model with the MambaClassifier class
model = MambaClassifier(
    AutoModel.from_pretrained(model_card, trust_remote_code=True),
    num_classes=N_CLASSES,
    hidden_dim=MAMBA_HIDDEN_SIZES.get(model_card),
)

# Define the image transform (processor and transform)
train_transform = model.create_transform(training=True) # auto_augment="rand-m9-mstd0.5-inc1"
test_transform = model.create_transform(training=False)

# Create the TorchPipeline object
pipeline = TorchPipeline(
    model_card=model_card,
    model=model,
    train_transform=train_transform,
    test_transform=test_transform,
)

In [23]:
# Define the num_epochs, optimizer and criterion
num_epochs = 3
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
criterion = torch.nn.CrossEntropyLoss()

# Generate the dataloaders
pipeline.generate_dataloader(train, val, test)
# Train and evaluate the model
pipeline.train_and_eval_model(
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=num_epochs,
)
# Test the model
pipeline.test_model(criterion=criterion)

TRAINING EXPERIMENT ID <20250502-195255_MambaVision-T-1K>
Epoch [1/3] | Train_Loss: 2.0062 | Train_Acc: 0.0625 | Val_Loss: 2.0011 | Val_Acc: 0.0312 -> Best model updated
Epoch [2/3] | Train_Loss: 1.9974 | Train_Acc: 0.1250 | Val_Loss: 1.9874 | Val_Acc: 0.0312
Epoch [3/3] | Train_Loss: 1.9758 | Train_Acc: 0.0625 | Val_Loss: 1.9797 | Val_Acc: 0.0625 -> Best model updated
Training completed. Best validation accuracy: 0.0625. Running steps training time: 5.36 s.

TESTING EXPERIMENT ID <20250502-195255_MambaVision-T-1K>
Test Loss: 1.9905 | Test Acc: 0.0625 | Running steps test time: 0.30 s.


## **MAMBA B 21 K**

In [17]:
model_card = "nvidia/MambaVision-B-21K"

# Define the model with the MambaClassifier class
model = MambaClassifier(
    AutoModel.from_pretrained(model_card, trust_remote_code=True),
    num_classes=N_CLASSES,
    hidden_dim=MAMBA_HIDDEN_SIZES.get(model_card),
)

# Define the image transform (processor and transform)
train_transform = model.create_transform(training=True)
test_transform = model.create_transform(training=False)

# Create the TorchPipeline object
pipeline = TorchPipeline(
    model_card=model_card,
    model=model,
    train_transform=train_transform,
    test_transform=test_transform,
)

In [18]:
# Define the num_epochs, optimizer and criterion
num_epochs = 3
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
criterion = torch.nn.CrossEntropyLoss()

# Generate the dataloaders
pipeline.generate_dataloader(train, val, test)
# Train and evaluate the model
pipeline.train_and_eval_model(
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=num_epochs,
)
# Test the model
pipeline.test_model(criterion=criterion)

TRAINING EXPERIMENT ID <20250502-194242_MambaVision-B-21K>
Epoch [1/3] | Train_Loss: 2.0714 | Train_Acc: 0.0312 | Val_Loss: 2.1670 | Val_Acc: 0.0312 -> Best model updated
Epoch [2/3] | Train_Loss: 1.9613 | Train_Acc: 0.2500 | Val_Loss: 2.1341 | Val_Acc: 0.0625 -> Best model updated
Epoch [3/3] | Train_Loss: 1.9565 | Train_Acc: 0.1562 | Val_Loss: 2.1045 | Val_Acc: 0.0938 -> Best model updated
Training completed. Best validation accuracy: 0.0938. Running steps training time: 7.05 s.

TESTING EXPERIMENT ID <20250502-194242_MambaVision-B-21K>
Test Loss: 2.0276 | Test Acc: 0.1875 | Running steps test time: 0.59 s.
