## Console for running classifier training

In [None]:
# Create datasets and dataloaders

from torch.nn import Sequential
from torchvision.transforms.v2 import RandomHorizontalFlip

from maviratrain.data.classification_dataset import make_training_dataloaders

# The path to the dataset used for training
# train_path = (
#     "/home/danny/mavira/FashionTraining/data/classifier/"
#     "classifier354-r2-s2-n2/train/"
# )
train_path = "/mnt/disks/localssd/data/classifier362-r2-s3-n4/train/"

# The path to the dataset used for validation
# val_path = (
#     "/home/danny/mavira/FashionTraining/data/classifier/"
#     "classifier354-r2-s2-n2/val/"
# )
val_path = "/mnt/disks/localssd/data/classifier362-r2-s3-n4/val/"

# Specify any non-default dataloader parameters
# Defaults found in maviratrain.utils.constants
dataloader_params = {"batch_size": 512, "num_workers": 8}

# Specify any additional transforms to apply to the data
transforms = Sequential(RandomHorizontalFlip(p=0.5))

# Create PyTorch datasets/dataloaders
train_dataset, train_dataloader, val_dataset, val_dataloader = (
    make_training_dataloaders(
        train_data_path=train_path,
        transforms=transforms,
        train_dataloader_params=dataloader_params,
        val_data_path=val_path,
        val_dataloader_params=dataloader_params,
    )
)

input_dims = list(train_dataset[0][0].shape)

In [2]:
# # Create ViT model

# from maviratrain.models.classifier_model import create_vit

# vit_kwargs = {
#     "image_size": train_dataset[0][0].shape[-1],
#     "patch_size": 32,
#     "num_layers": 8,
#     "num_heads": 8,
#     "hidden_dim": 320,
#     "mlp_dim": 1024,
#     "dropout": 0.2,
#     "attention_dropout": 0.2,
#     "representation_size": None,
#     "num_classes": len(train_dataset.classes),
# }
# model = create_vit(vit_kwargs=vit_kwargs)

In [3]:
# # Create EfficientNet-B3 model

# from torchvision.models import EfficientNet_B3_Weights

# from maviratrain.models.classifier_model import create_efficientnet_b3

# model = create_efficientnet_b3(
#     # weights=None,
#     weights=EfficientNet_B3_Weights.IMAGENET1K_V1,
#     num_classes=len(train_dataset.classes),
# )

In [4]:
# # Resume training an EfficientNet-B3 model from a checkpoint

# from maviratrain.models.classifier_model import create_efficientnet_b3

# model = create_efficientnet_b3(
#     weights=(
#         "/home/danny/mavira/FashionTraning/checkpoints/"
#         "temp24_S42816_A8.131024986505508_T2025-01-15T02:06:36+00:00.pt"
#     ),
#     # weights=EfficientNet_B3_Weights.IMAGENET1K_V1,
#     num_classes=len(train_dataset.classes),
# )

In [5]:
# # Create ShuffleNetV2 model

# from torchvision.models import shufflenet_v2_x0_5

# model = shufflenet_v2_x0_5(num_classes=len(train_dataset.classes)).to("cuda")

In [6]:
# Create SqueezeNet 1.1 model

from torchvision.models import squeezenet1_1

model = squeezenet1_1(num_classes=len(train_dataset.classes)).to("cuda")

In [8]:
# # Create TestModel model

# from maviratrain.models.classifier_model import TestModel

# model = TestModel(
#     input_dims=input_dims, num_classes=len(train_dataset.classes)
# )

In [9]:
# Create optimizers and learning rate schedulers

from torch.optim import AdamW, lr_scheduler

# Number of epochs for warmup and total training
warmup_epochs = 10
total_epochs = 100

# Set up optimizer
optimizer = AdamW(
    params=model.parameters(),
    lr=0.001,
    betas=(0.9, 0.999),
    eps=1e-10,
    weight_decay=0.01,
    fused=True,
)

# Set up learning rate schedulers
scheduler1 = lr_scheduler.LinearLR(
    optimizer,
    start_factor=0.1,
    end_factor=1,
    total_iters=warmup_epochs,
    last_epoch=-1,
)
scheduler2 = lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=total_epochs, last_epoch=warmup_epochs
)

optimization = [optimizer, scheduler1, scheduler2]

In [10]:
# Create loss function

from torch.nn import CrossEntropyLoss

loss_fn = CrossEntropyLoss(
    weight=None,
    reduction="sum",
    label_smoothing=0.2,
)

In [11]:
# Set up Trainer

from maviratrain.train.train_classifier import Trainer

# Set up trainer
trainer = Trainer(
    loaders=[train_dataloader, val_dataloader],  # type: ignore
    optimization=optimization,  # type: ignore
    loss_fn=loss_fn,
)

In [12]:
# Train model

model, n_steps_trained, n_epochs_trained = trainer.train(
    model=model, n_epochs=total_epochs, val_interval=5
)

22:42:55 - Val Epoch: 0        Loss: 5.8686        Accuracy: 0.32        Top-5 Accuracy: 1.41
22:47:20 - Train Epoch: 1        Loss: 5.8621        Accuracy: 1.13        Top-5 Accuracy: 4.31        Time: 265.02
22:48:27 - Val Epoch: 1        Loss: 5.7882        Accuracy: 1.63        Top-5 Accuracy: 6.23
22:52:52 - Train Epoch: 2        Loss: 5.8359        Accuracy: 1.68        Top-5 Accuracy: 6.33        Time: 264.72
22:54:00 - Val Epoch: 2        Loss: 5.7643        Accuracy: 2.20        Top-5 Accuracy: 7.55
22:58:25 - Train Epoch: 3        Loss: 5.8155        Accuracy: 2.04        Top-5 Accuracy: 7.57        Time: 265.07
22:59:33 - Val Epoch: 3        Loss: 5.7500        Accuracy: 2.56        Top-5 Accuracy: 8.68
23:03:57 - Train Epoch: 4        Loss: 5.7971        Accuracy: 2.48        Top-5 Accuracy: 8.77        Time: 264.30
23:05:05 - Val Epoch: 4        Loss: 5.7378        Accuracy: 2.42        Top-5 Accuracy: 9.12
23:09:29 - Train Epoch: 5        Loss: 5.7726        Accuracy: 2.9