In [1]:
from torch import nn
from torchvision import transforms
from torch.optim import AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau

from ml_tools.ML_vision_datasetmaster import DragonDatasetVision
from ml_tools.ML_trainer import DragonTrainer
from ml_tools.ML_callbacks import DragonModelCheckpoint, DragonEarlyStopping, DragonLRScheduler
from ml_tools.ML_utilities import inspect_model_architecture
from ml_tools.ML_configuration import MultiClassImageClassificationMetricsFormat, FinalizeMultiClassImageClassification
from ml_tools import custom_logger

from rootpaths import PM
from visual_ccc.gradcam import custom_alexnet, SIZE_REQUIREMENT

## Multiclass classification: Dendrites, Spheroids, Alloys

In [2]:
VAL_SIZE = 0.2
TEST_SIZE = 0.1
RANDOM_STATE = 101

vision_dataset = DragonDatasetVision.from_folder(PM.original_images)

vision_dataset.split_data(val_size=VAL_SIZE, 
                          test_size=TEST_SIZE,
                          random_state=RANDOM_STATE)

vision_dataset.configure_transforms(resize_size=int(1.2*SIZE_REQUIREMENT),
                                    crop_size=SIZE_REQUIREMENT,
                                    mean=None, std=None,
                                    pre_transforms=[transforms.Grayscale(num_output_channels=1)])

train_dataset, validation_dataset, test_dataset = vision_dataset.get_datasets()


üêâ 2025-11-10 12:38 [‚úÖ [32mINFO[0m] - Found 112 images in 3 classes.[0m

üêâ 2025-11-10 12:38 [‚úÖ [32mINFO[0m] - Test set created with 12 images.[0m

üêâ 2025-11-10 12:38 [‚úÖ [32mINFO[0m] - Data split into: 
- Training: 78 images 
- Validation: 22 images[0m

üêâ 2025-11-10 12:38 [‚úÖ [32mINFO[0m] - Image transforms configured and applied.[0m


In [25]:
class_map = vision_dataset.save_class_map(save_dir=PM.artifacts)

vision_dataset.save_transform_recipe(filepath=PM.transform_recipe_file)


üêâ 2025-11-10 13:09 [‚úÖ [32mINFO[0m] - Log saved as: 'Class_to_Index'[0m

üêâ 2025-11-10 13:09 [‚úÖ [32mINFO[0m] - Transform recipe saved as 'transform_recipe.json'.[0m


In [26]:
INITIAL_LR = 0.0002
SCHEDULER_PATIENCE = 2
# STOP_PATIENCE = 12

# Model
model = custom_alexnet()
original_in_features: int = model.classifier[6].in_features # type: ignore
model.classifier[6] = nn.Linear(in_features=original_in_features, out_features=3, bias=True) # 3 class classification

inspect_model_architecture(model=model, save_dir=PM.artifacts)

# Optimizer
optimizer = AdamW(params=model.parameters(), lr=INITIAL_LR)

# Trainer
trainer = DragonTrainer(model=model,
                    train_dataset=train_dataset,
                    validation_dataset=validation_dataset,
                    kind="multiclass image classification",
                    optimizer=optimizer,
                    criterion="auto",
                    device="cuda:0",
                    checkpoint_callback=DragonModelCheckpoint(save_dir=PM.checkpoints, mode="min"),
                    early_stopping_callback=None,
                    # early_stopping_callback=DragonEarlyStopping(patience=STOP_PATIENCE, mode="min"),
                    lr_scheduler_callback=DragonLRScheduler(scheduler=ReduceLROnPlateau(optimizer=optimizer, 
                                                                                        mode="min",
                                                                                        factor=0.7,
                                                                                        patience=SCHEDULER_PATIENCE))
                    )


üêâ 2025-11-10 13:09 [‚úÖ [32mINFO[0m] - Model architecture summary saved to 'model_architecture_summary.txt'[0m


In [27]:
BATCH_SIZE = 2

history = trainer.fit(save_dir=PM.artifacts, epochs=100, batch_size=BATCH_SIZE)

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch 1/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 2/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 3/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 4/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 5/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:09 [‚úÖ [32mINFO[0m] - Epoch 5: Learning rate changed to 0.000140[0m


Epoch 6/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 7/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 8/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 9/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 10/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 11/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 12/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 13/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 14/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:10 [‚úÖ [32mINFO[0m] - Epoch 14: Learning rate changed to 0.000098[0m


Epoch 15/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 16/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 17/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 18/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 19/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 20/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:10 [‚úÖ [32mINFO[0m] - Epoch 20: Learning rate changed to 0.000069[0m


Epoch 21/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 22/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 23/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:10 [‚úÖ [32mINFO[0m] - Epoch 23: Learning rate changed to 0.000048[0m


Epoch 24/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 25/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 26/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 27/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 28/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 29/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 30/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 31/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 32/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 33/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:11 [‚úÖ [32mINFO[0m] - Epoch 33: Learning rate changed to 0.000034[0m


Epoch 34/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 35/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 36/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 37/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 38/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 39/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 40/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:11 [‚úÖ [32mINFO[0m] - Epoch 40: Learning rate changed to 0.000024[0m


Epoch 41/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 42/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 43/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:11 [‚úÖ [32mINFO[0m] - Epoch 43: Learning rate changed to 0.000016[0m


Epoch 44/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 45/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 46/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:11 [‚úÖ [32mINFO[0m] - Epoch 46: Learning rate changed to 0.000012[0m


Epoch 47/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 48/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 49/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:11 [‚úÖ [32mINFO[0m] - Epoch 49: Learning rate changed to 0.000008[0m


Epoch 50/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 51/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 52/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:11 [‚úÖ [32mINFO[0m] - Epoch 52: Learning rate changed to 0.000006[0m


Epoch 53/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 54/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 55/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:11 [‚úÖ [32mINFO[0m] - Epoch 55: Learning rate changed to 0.000004[0m


Epoch 56/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 57/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 58/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:11 [‚úÖ [32mINFO[0m] - Epoch 58: Learning rate changed to 0.000003[0m


Epoch 59/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 60/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 61/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:11 [‚úÖ [32mINFO[0m] - Epoch 61: Learning rate changed to 0.000002[0m


Epoch 62/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 63/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 64/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:12 [‚úÖ [32mINFO[0m] - Epoch 64: Learning rate changed to 0.000001[0m


Epoch 65/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 66/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 67/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:12 [‚úÖ [32mINFO[0m] - Epoch 67: Learning rate changed to 0.000001[0m


Epoch 68/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 69/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 70/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:12 [‚úÖ [32mINFO[0m] - Epoch 70: Learning rate changed to 0.000001[0m


Epoch 71/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 72/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 73/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:12 [‚úÖ [32mINFO[0m] - Epoch 73: Learning rate changed to 0.000000[0m


Epoch 74/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 75/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 76/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:12 [‚úÖ [32mINFO[0m] - Epoch 76: Learning rate changed to 0.000000[0m


Epoch 77/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 78/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 79/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:12 [‚úÖ [32mINFO[0m] - Epoch 79: Learning rate changed to 0.000000[0m


Epoch 80/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 81/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 82/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:12 [‚úÖ [32mINFO[0m] - Epoch 82: Learning rate changed to 0.000000[0m


Epoch 83/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 84/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 85/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:12 [‚úÖ [32mINFO[0m] - Epoch 85: Learning rate changed to 0.000000[0m


Epoch 86/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 87/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 88/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:12 [‚úÖ [32mINFO[0m] - Epoch 88: Learning rate changed to 0.000000[0m


Epoch 89/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 90/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 91/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:12 [‚úÖ [32mINFO[0m] - Epoch 91: Learning rate changed to 0.000000[0m


Epoch 92/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 93/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 94/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:12 [‚úÖ [32mINFO[0m] - Epoch 94: Learning rate changed to 0.000000[0m


Epoch 95/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 96/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 97/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:13 [‚úÖ [32mINFO[0m] - Epoch 97: Learning rate changed to 0.000000[0m


Epoch 98/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 99/100:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 100/100:   0%|          | 0/39 [00:00<?, ?it/s]


üêâ 2025-11-10 13:13 [‚úÖ [32mINFO[0m] - üìâ Loss plot saved as 'loss_plot.svg'[0m


In [28]:
train_log = {
    "validation size": VAL_SIZE,
    "test size": TEST_SIZE,
    "images per dataset": vision_dataset.images_per_dataset(),
    "random state": RANDOM_STATE,
    "initial lr": INITIAL_LR,
    "scheduler patience": SCHEDULER_PATIENCE,
    # "stop patience": STOP_PATIENCE,
    "batch size": BATCH_SIZE,
    "history": history
}

custom_logger(data=train_log,
              save_directory=PM.results,
              log_name="train_log",
              dict_as="json")


üêâ 2025-11-10 13:13 [‚úÖ [32mINFO[0m] - Log saved as: 'train_log_20251110_131318'[0m


In [32]:
trainer.evaluate(
    save_dir=PM.metrics, 
    model_checkpoint="latest",
    classification_threshold=0.60,
    test_data=test_dataset,
    val_format_configuration=MultiClassImageClassificationMetricsFormat(cmap='BuGn',
                                                                    ROC_PR_line="darkorange"),
    test_format_configuration=MultiClassImageClassificationMetricsFormat(cmap='BuPu',
                                                                     ROC_PR_line="forestgreen")
)


üêâ 2025-11-10 13:21 [‚úÖ [32mINFO[0m] - Evaluating on validation dataset. Metrics will be saved to 'Validation_Metrics'[0m

üêâ 2025-11-10 13:21 [‚úÖ [32mINFO[0m] - Loading checkpoint from 'epoch37_PyModelCheckpoint_0.3277.pth'...[0m

üêâ 2025-11-10 13:21 [‚úÖ [32mINFO[0m] - Restored training history up to epoch 37.[0m

üêâ 2025-11-10 13:21 [‚úÖ [32mINFO[0m] - Restored LR scheduler state for: ReduceLROnPlateau[0m

üêâ 2025-11-10 13:21 [‚úÖ [32mINFO[0m] - Restored DragonModelCheckpoint 'best' score to: 0.3277[0m

üêâ 2025-11-10 13:21 [‚úÖ [32mINFO[0m] - Checkpoint loaded. Resuming training from epoch 38.[0m

üêâ 2025-11-10 13:22 [‚úÖ [32mINFO[0m] - üìù Classification report saved as 'classification_report.txt'[0m

üêâ 2025-11-10 13:22 [‚úÖ [32mINFO[0m] - üìä Report heatmap saved as 'classification_report_heatmap.svg'[0m

üêâ 2025-11-10 13:22 [‚úÖ [32mINFO[0m] - ‚ùáÔ∏è Confusion matrix saved as 'confusion_matrix.svg'[0m

üêâ 2025-11-10 13:22 [‚úÖ 

In [None]:
finalizer = FinalizeMultiClassImageClassification(filename="DendritesSpheroids", class_map=class_map)

trainer.finalize_model_training(model_checkpoint="current",
                                save_dir=PM.artifacts,
                                finalize_config=finalizer)