In [25]:
# Define backbone of ResNet18 model

import torch.nn as nn  # Neural Network lib
import torchvision


class MyResNet18(nn.Module):
    """
    ResNet18 backbone
    Inherit PyTorch nn Module, define and train my own ResNet from scratch
    num_classes: class num for the model, 91 classes(categories) for COCO dataset
    """

    def __init__(self, num_classes=1000, weights=None):
        """
        Args:
            num_classes (int):
                Number of output channels from the final linear layer.
                For ImageNet classification use 1000; for a custom task like COCO, 91 classes(categories)
            weights (bool):
                If True, loads ImageNet‑pretrained weights.
                If False, train from scratch
        """
        super().__init__()
        # load ResNet18 architecture
        self.model = torchvision.models.resnet18(
            weights=weights)  # Main feature extractor, no pretrained weights
        # replace the final fully connected layer with new classes
        in_features = self.model.fc.in_features
        self.model.fc = nn.Linear(in_features, num_classes)

    def forward(self, x):
        """
        Forward pass through the network.

        Args:
            x (torch.Tensor): Input tensor of shape [B, 3, H, W],
                              where B = batch size.

        Returns:
            torch.Tensor: Output logits of shape [B, num_classes].
        """
        return self.model(x)


In [26]:
# Transform, image preprocessing
from torchvision import transforms

train_tf = transforms.Compose([
    transforms.RandomCrop(32, padding=4), # augmentation
    transforms.RandomHorizontalFlip(0.5),   # probability of flipping current image
    transforms.ToTensor(),              # PIL -> PyTorch FloatTensor
    transforms.Normalize(               # data from ImageNet, pixel normalization
        mean=[0.5071, 0.4867, 0.4408],   # official CIFAR100 stats
        std=[0.2675, 0.2565, 0.2761]
    )
])

val_tf = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5071, 0.4867, 0.4408],
        std=[0.2675, 0.2565, 0.2761]
    )
])


In [43]:
from pathlib import Path
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR100

project_root = Path.cwd().parent            # ➜ NeuralRipper/
data_root    = project_root / "data"     # folder that already contains “cifar-100-python/”

train_ds = CIFAR100(root=str(data_root),
                    train=True,
                    download=True,          # first run only
                    transform=train_tf)

val_ds   = CIFAR100(root=str(data_root),
                    train=False,
                    download=False,
                    transform=val_tf)

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True,
                          num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=100, shuffle=False,
                          num_workers=4, pin_memory=True)

print(f"Train / Val sizes ➜ {len(train_ds)} / {len(val_ds)}")


Train / Val sizes ➜ 50000 / 10000


In [44]:
#!/usr/bin/env python3
"""
Training related functions definitions
"""

import os
import logging
from datetime import datetime

import mlflow
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_curve, auc, average_precision_score
from torch.utils.data import DataLoader


# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


def get_dataset_paths():
    """
    Get appropriate data paths based on environment variable.

    Returns:
        tuple: (image_directory_path, annotation_file_path)
    """
    use_full = os.environ.get("USE_FULL_DATASET", "0").lower() in ("1", "true", "yes")
    base_dir = os.path.abspath(".")

    if use_full:
        img_dir = os.path.join(base_dir, "data", "coco", "images", "train2017")
        ann_file = os.path.join(base_dir, "data", "coco", "annotations", "instances_train2017.json")
    else:
        img_dir = os.path.join(base_dir, "data", "coco", "subset", "images")
        ann_file = os.path.join(base_dir, "data", "coco", "subset", "annotations", "instances_subset.json")

    return img_dir, ann_file


def select_device():
    """
    Select the appropriate device for training.

    Returns:
        torch.device: The device to use for training
    """
    if torch.backends.mps.is_available():
        device = torch.device("mps")
    elif torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    logger.info(f"Using device: {device}")
    return device


def setup_mlflow(batch_size, learning_rate, num_epochs, device):
    """
    Set up and configure the MLFlow for experiment tracking.

    Args:
        batch_size (int): Training batch size
        learning_rate (float): Learning rate
        num_epochs (int): Number of training epochs
        device (torch.device): Training device

    Returns:
        MLFlow instance
    """

    '''
    Actual Host where mlflow server deployed
    Start mlflow server using
    mlflow server \
      --backend-store-uri mysql+pymysql://user:pass@localhost:3306/mlflow \
      --default-artifact-root gs://gcs-bucket/mlflow-artifacts \
      --host 0.0.0.0 --port 5000
    '''
    MLFLOW_TRACKING_URI = "http://localhost:5000"
    mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
    print(f"Tracking URI set to: {MLFLOW_TRACKING_URI}")
    # the name of the experiment will show up in mlflow, usually one model one experiment
    mlflow.set_experiment("ResNet18-CIFAR100")
    mlflow.start_run(run_name=datetime.now().strftime("%Y%m%d_%H%M%S"))

    mlflow.log_params({
        "batch_size": batch_size,
        "learning_rate": learning_rate,
        "epochs": num_epochs,
        "model": "ResNet18",
        "device": device.type
    })


def track_metrics(metrics_dict, epoch, step=None, context=None):
    """
    Track multiple metrics in mlflow.

    Args:
        metrics_dict (dict): Dictionary of metrics to track
        epoch (int): Current epoch
        step (int, optional): Current step within the epoch
        context (dict, optional): Additional context for the metrics
    """
    context = context or {"subset": "train"}

    for name, value in metrics_dict.items():
        if step is not None:
            mlflow.log_metric(name, value, step=step)
        else:
            mlflow.log_metric(name, value, step=epoch)


def calculate_metrics(all_targets, all_predictions):
    """
    all_targets      - list of (C,) one-hot np.float32
    all_predictions  - list of (C,) sigmoid scores np.float32
    """
    y_true  = np.vstack(all_targets).astype(np.int8)     # (N, C)
    y_score = np.vstack(all_predictions)                 # (N, C) ∈[0,1]

    metrics = {}

    # Average precision (macro over classes that have ≥1 positive)
    aps = []
    for c in range(y_true.shape[1]):
        if y_true[:, c].sum() == 0:      # skip empty class
            continue
        aps.append(average_precision_score(y_true[:, c], y_score[:, c]))
    metrics["avg_precision"] = float(np.mean(aps)) if aps else 0.0

    # ROC AUC (macro over valid classes)
    aucs = []
    for c in range(y_true.shape[1]):
        pos = y_true[:, c].sum()
        neg = (1 - y_true[:, c]).sum()
        if pos == 0 or neg == 0:
            continue                     # undefined
        fpr, tpr, _ = roc_curve(y_true[:, c], y_score[:, c])
        aucs.append(auc(fpr, tpr))
    metrics["roc_auc"] = float(np.mean(aucs)) if aucs else 0.0

    return metrics


def train_epoch(model, loader, optimizer, criterion, device, epoch):
    model.train()
    running_loss = 0.0
    running_correct = 0
    running_total = 0

    for i, (images, targets) in enumerate(loader):
        images  = images.to(device)
        targets = targets.to(device)           # int labels, NOT one-hot

        optimizer.zero_grad()
        logits = model(images)
        loss   = criterion(logits, targets)
        loss.backward()
        optimizer.step()

        # accuracy
        preds = logits.argmax(dim=1)
        running_correct += (preds == targets).sum().item()
        running_total   += targets.size(0)
        running_loss    += loss.item()

        if i % 10 == 9:
            avg_loss = running_loss / 10
            acc_sofar = running_correct / running_total
            logger.info(f"Epoch {epoch+1} | Batch {i+1} | Loss {avg_loss:.4f} | Acc {acc_sofar:.4f}")
            running_loss = 0.0

    # FIX: Calculate and return both loss and accuracy
    epoch_loss = running_loss / len(loader)
    epoch_acc = running_correct / running_total
    return epoch_loss, epoch_acc


In [45]:
# main train logic
from mlflow.models import infer_signature

def train():
    # Hyperparameters
    batch_size    = 64
    num_epochs    = 10
    learning_rate = 1e-4

    # Setup
    device = select_device()
    setup_mlflow(batch_size, learning_rate, num_epochs, device)

    # Data & model
    train_dataset = train_ds
    num_classes   = len(train_dataset.classes)         # CIFAR-100 has 100 classes
    model = MyResNet18(num_classes=num_classes, weights=None).to(device)

    # Loss & optimizer
    criterion = nn.CrossEntropyLoss()                  # single-label classification
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    best_acc = float("-inf")

    # Training loop
    for epoch in range(num_epochs):
        # 1 train for one epoch → returns (loss, accuracy)
        epoch_loss, epoch_acc = train_epoch(
            model, train_loader, optimizer, criterion, device, epoch
        )

        # 2 log best model to MLflow
        if epoch_acc > best_acc:
            best_acc = epoch_acc

            # package & log model for serving
            sample_in  = torch.randn(1, 3, 224, 224, dtype=torch.float32).to(device)
            signature = infer_signature(
                sample_in.cpu().numpy(),
                model(sample_in).detach().cpu().numpy()
            )
            pip_reqs = [
                f"torch=={torch.__version__}",
                f"torchvision=={torchvision.__version__}",
            ]
            mlflow.pytorch.log_model(
                model,
                artifact_path="best_model",
                signature=signature,
                pip_requirements=pip_reqs
            )

        # 3 log epoch‐level metrics to MLflow
        mlflow.log_metrics({
            "train_loss": epoch_loss,
            "train_accuracy": epoch_acc,
            "learning_rate": optimizer.param_groups[0]["lr"]
        }, step=epoch)

        # 4 console output
        print(f"Epoch {epoch+1}/{num_epochs} — "
              f"Loss: {epoch_loss:.4f}  Acc: {epoch_acc:.4f}")

    mlflow.end_run()


# run with exception handling
try:
    train()
except Exception as e:
    print(f"Train failed: {e}")
    mlflow.end_run(status="FAILED")


2025-05-24 13:46:38,223 - INFO - Using device: mps


Tracking URI set to: http://localhost:5000
1111111


2025-05-24 13:46:45,284 - INFO - Epoch 1 | Batch 10 | Loss 4.8116 | Acc 0.0109
2025-05-24 13:46:45,870 - INFO - Epoch 1 | Batch 20 | Loss 4.7233 | Acc 0.0156
2025-05-24 13:46:46,361 - INFO - Epoch 1 | Batch 30 | Loss 4.6409 | Acc 0.0180
2025-05-24 13:46:46,850 - INFO - Epoch 1 | Batch 40 | Loss 4.4675 | Acc 0.0223
2025-05-24 13:46:47,377 - INFO - Epoch 1 | Batch 50 | Loss 4.4395 | Acc 0.0277
2025-05-24 13:46:47,864 - INFO - Epoch 1 | Batch 60 | Loss 4.3829 | Acc 0.0319
2025-05-24 13:46:48,346 - INFO - Epoch 1 | Batch 70 | Loss 4.3463 | Acc 0.0358
2025-05-24 13:46:48,838 - INFO - Epoch 1 | Batch 80 | Loss 4.2734 | Acc 0.0389
2025-05-24 13:46:49,320 - INFO - Epoch 1 | Batch 90 | Loss 4.2470 | Acc 0.0411
2025-05-24 13:46:49,806 - INFO - Epoch 1 | Batch 100 | Loss 4.2239 | Acc 0.0443
2025-05-24 13:46:50,281 - INFO - Epoch 1 | Batch 110 | Loss 4.1824 | Acc 0.0474
2025-05-24 13:46:50,771 - INFO - Epoch 1 | Batch 120 | Loss 4.1105 | Acc 0.0493
2025-05-24 13:46:51,266 - INFO - Epoch 1 | Batch 

222222
333333
Epoch 1/10 — Loss: 0.0095  Acc: 0.0950
1111111


2025-05-24 13:47:32,968 - INFO - Epoch 2 | Batch 10 | Loss 3.6308 | Acc 0.1555
2025-05-24 13:47:33,458 - INFO - Epoch 2 | Batch 20 | Loss 3.6401 | Acc 0.1516
2025-05-24 13:47:33,944 - INFO - Epoch 2 | Batch 30 | Loss 3.6072 | Acc 0.1529
2025-05-24 13:47:34,429 - INFO - Epoch 2 | Batch 40 | Loss 3.5500 | Acc 0.1545
2025-05-24 13:47:34,914 - INFO - Epoch 2 | Batch 50 | Loss 3.5752 | Acc 0.1573
2025-05-24 13:47:35,400 - INFO - Epoch 2 | Batch 60 | Loss 3.5311 | Acc 0.1612
2025-05-24 13:47:35,887 - INFO - Epoch 2 | Batch 70 | Loss 3.5670 | Acc 0.1622
2025-05-24 13:47:36,363 - INFO - Epoch 2 | Batch 80 | Loss 3.5773 | Acc 0.1635
2025-05-24 13:47:36,838 - INFO - Epoch 2 | Batch 90 | Loss 3.5382 | Acc 0.1626
2025-05-24 13:47:37,315 - INFO - Epoch 2 | Batch 100 | Loss 3.5699 | Acc 0.1622
2025-05-24 13:47:37,795 - INFO - Epoch 2 | Batch 110 | Loss 3.5619 | Acc 0.1615
2025-05-24 13:47:38,277 - INFO - Epoch 2 | Batch 120 | Loss 3.5472 | Acc 0.1608
2025-05-24 13:47:38,751 - INFO - Epoch 2 | Batch 

222222
333333
Epoch 2/10 — Loss: 0.0081  Acc: 0.1727
1111111


2025-05-24 13:48:19,591 - INFO - Epoch 3 | Batch 10 | Loss 3.2889 | Acc 0.2094
2025-05-24 13:48:20,071 - INFO - Epoch 3 | Batch 20 | Loss 3.2828 | Acc 0.2066
2025-05-24 13:48:20,543 - INFO - Epoch 3 | Batch 30 | Loss 3.2937 | Acc 0.2065
2025-05-24 13:48:21,016 - INFO - Epoch 3 | Batch 40 | Loss 3.3794 | Acc 0.2002
2025-05-24 13:48:21,487 - INFO - Epoch 3 | Batch 50 | Loss 3.2745 | Acc 0.2003
2025-05-24 13:48:21,960 - INFO - Epoch 3 | Batch 60 | Loss 3.2656 | Acc 0.2000
2025-05-24 13:48:22,437 - INFO - Epoch 3 | Batch 70 | Loss 3.2412 | Acc 0.2012
2025-05-24 13:48:22,916 - INFO - Epoch 3 | Batch 80 | Loss 3.2238 | Acc 0.2030
2025-05-24 13:48:23,394 - INFO - Epoch 3 | Batch 90 | Loss 3.3182 | Acc 0.2027
2025-05-24 13:48:23,871 - INFO - Epoch 3 | Batch 100 | Loss 3.2979 | Acc 0.2030
2025-05-24 13:48:24,350 - INFO - Epoch 3 | Batch 110 | Loss 3.2632 | Acc 0.2033
2025-05-24 13:48:24,827 - INFO - Epoch 3 | Batch 120 | Loss 3.2037 | Acc 0.2058
2025-05-24 13:48:25,304 - INFO - Epoch 3 | Batch 

222222
333333
Epoch 3/10 — Loss: 0.0082  Acc: 0.2173
1111111


2025-05-24 13:49:06,059 - INFO - Epoch 4 | Batch 10 | Loss 3.0908 | Acc 0.2352
2025-05-24 13:49:06,542 - INFO - Epoch 4 | Batch 20 | Loss 2.9740 | Acc 0.2461
2025-05-24 13:49:07,011 - INFO - Epoch 4 | Batch 30 | Loss 3.0610 | Acc 0.2466
2025-05-24 13:49:07,484 - INFO - Epoch 4 | Batch 40 | Loss 2.9857 | Acc 0.2527
2025-05-24 13:49:07,959 - INFO - Epoch 4 | Batch 50 | Loss 3.0397 | Acc 0.2502
2025-05-24 13:49:08,435 - INFO - Epoch 4 | Batch 60 | Loss 3.1225 | Acc 0.2452
2025-05-24 13:49:08,912 - INFO - Epoch 4 | Batch 70 | Loss 3.1401 | Acc 0.2426
2025-05-24 13:49:09,391 - INFO - Epoch 4 | Batch 80 | Loss 3.0920 | Acc 0.2414
2025-05-24 13:49:09,867 - INFO - Epoch 4 | Batch 90 | Loss 3.1124 | Acc 0.2395
2025-05-24 13:49:10,345 - INFO - Epoch 4 | Batch 100 | Loss 3.0549 | Acc 0.2393
2025-05-24 13:49:10,824 - INFO - Epoch 4 | Batch 110 | Loss 3.0552 | Acc 0.2405
2025-05-24 13:49:11,300 - INFO - Epoch 4 | Batch 120 | Loss 3.0442 | Acc 0.2419
2025-05-24 13:49:11,781 - INFO - Epoch 4 | Batch 

222222
333333
Epoch 4/10 — Loss: 0.0068  Acc: 0.2499
1111111


2025-05-24 13:49:52,815 - INFO - Epoch 5 | Batch 10 | Loss 2.9754 | Acc 0.2648
2025-05-24 13:49:53,301 - INFO - Epoch 5 | Batch 20 | Loss 2.9545 | Acc 0.2684
2025-05-24 13:49:53,775 - INFO - Epoch 5 | Batch 30 | Loss 2.9117 | Acc 0.2693
2025-05-24 13:49:54,246 - INFO - Epoch 5 | Batch 40 | Loss 2.8466 | Acc 0.2762
2025-05-24 13:49:54,720 - INFO - Epoch 5 | Batch 50 | Loss 2.9047 | Acc 0.2762
2025-05-24 13:49:55,202 - INFO - Epoch 5 | Batch 60 | Loss 2.7922 | Acc 0.2814
2025-05-24 13:49:55,685 - INFO - Epoch 5 | Batch 70 | Loss 2.8505 | Acc 0.2811
2025-05-24 13:49:56,165 - INFO - Epoch 5 | Batch 80 | Loss 2.8975 | Acc 0.2796
2025-05-24 13:49:56,640 - INFO - Epoch 5 | Batch 90 | Loss 2.8913 | Acc 0.2779
2025-05-24 13:49:57,120 - INFO - Epoch 5 | Batch 100 | Loss 2.9993 | Acc 0.2745
2025-05-24 13:49:57,613 - INFO - Epoch 5 | Batch 110 | Loss 2.9452 | Acc 0.2732
2025-05-24 13:49:58,108 - INFO - Epoch 5 | Batch 120 | Loss 2.9022 | Acc 0.2732
2025-05-24 13:49:58,594 - INFO - Epoch 5 | Batch 

222222
333333
Epoch 5/10 — Loss: 0.0071  Acc: 0.2797
1111111


2025-05-24 13:50:40,184 - INFO - Epoch 6 | Batch 10 | Loss 2.7601 | Acc 0.3125
2025-05-24 13:50:40,696 - INFO - Epoch 6 | Batch 20 | Loss 2.8107 | Acc 0.2969
2025-05-24 13:50:41,206 - INFO - Epoch 6 | Batch 30 | Loss 2.7673 | Acc 0.2979
2025-05-24 13:50:41,688 - INFO - Epoch 6 | Batch 40 | Loss 2.7648 | Acc 0.2963
2025-05-24 13:50:42,177 - INFO - Epoch 6 | Batch 50 | Loss 2.7630 | Acc 0.2998
2025-05-24 13:50:42,673 - INFO - Epoch 6 | Batch 60 | Loss 2.7531 | Acc 0.2987
2025-05-24 13:50:43,168 - INFO - Epoch 6 | Batch 70 | Loss 2.8220 | Acc 0.2971
2025-05-24 13:50:43,645 - INFO - Epoch 6 | Batch 80 | Loss 2.7914 | Acc 0.2943
2025-05-24 13:50:44,121 - INFO - Epoch 6 | Batch 90 | Loss 2.7947 | Acc 0.2951
2025-05-24 13:50:44,600 - INFO - Epoch 6 | Batch 100 | Loss 2.8198 | Acc 0.2952
2025-05-24 13:50:45,078 - INFO - Epoch 6 | Batch 110 | Loss 2.7166 | Acc 0.2963
2025-05-24 13:50:45,560 - INFO - Epoch 6 | Batch 120 | Loss 2.7486 | Acc 0.2957
2025-05-24 13:50:46,047 - INFO - Epoch 6 | Batch 

222222
333333
Epoch 6/10 — Loss: 0.0067  Acc: 0.3030
1111111


2025-05-24 13:51:27,313 - INFO - Epoch 7 | Batch 10 | Loss 2.6563 | Acc 0.3234
2025-05-24 13:51:27,805 - INFO - Epoch 7 | Batch 20 | Loss 2.6811 | Acc 0.3230
2025-05-24 13:51:28,316 - INFO - Epoch 7 | Batch 30 | Loss 2.6091 | Acc 0.3260
2025-05-24 13:51:28,849 - INFO - Epoch 7 | Batch 40 | Loss 2.6560 | Acc 0.3256
2025-05-24 13:51:29,349 - INFO - Epoch 7 | Batch 50 | Loss 2.6672 | Acc 0.3262
2025-05-24 13:51:29,851 - INFO - Epoch 7 | Batch 60 | Loss 2.6502 | Acc 0.3264
2025-05-24 13:51:30,332 - INFO - Epoch 7 | Batch 70 | Loss 2.6236 | Acc 0.3251
2025-05-24 13:51:30,837 - INFO - Epoch 7 | Batch 80 | Loss 2.6539 | Acc 0.3266
2025-05-24 13:51:31,369 - INFO - Epoch 7 | Batch 90 | Loss 2.6754 | Acc 0.3244
2025-05-24 13:51:31,877 - INFO - Epoch 7 | Batch 100 | Loss 2.6795 | Acc 0.3234
2025-05-24 13:51:32,358 - INFO - Epoch 7 | Batch 110 | Loss 2.5610 | Acc 0.3249
2025-05-24 13:51:32,852 - INFO - Epoch 7 | Batch 120 | Loss 2.6546 | Acc 0.3250
2025-05-24 13:51:33,377 - INFO - Epoch 7 | Batch 

222222
333333
Epoch 7/10 — Loss: 0.0071  Acc: 0.3282
1111111


2025-05-24 13:52:15,289 - INFO - Epoch 8 | Batch 10 | Loss 2.4340 | Acc 0.3680
2025-05-24 13:52:15,775 - INFO - Epoch 8 | Batch 20 | Loss 2.5354 | Acc 0.3578
2025-05-24 13:52:16,249 - INFO - Epoch 8 | Batch 30 | Loss 2.4891 | Acc 0.3542
2025-05-24 13:52:16,722 - INFO - Epoch 8 | Batch 40 | Loss 2.5532 | Acc 0.3527
2025-05-24 13:52:17,200 - INFO - Epoch 8 | Batch 50 | Loss 2.5008 | Acc 0.3566
2025-05-24 13:52:17,678 - INFO - Epoch 8 | Batch 60 | Loss 2.5355 | Acc 0.3543
2025-05-24 13:52:18,156 - INFO - Epoch 8 | Batch 70 | Loss 2.5546 | Acc 0.3526
2025-05-24 13:52:18,634 - INFO - Epoch 8 | Batch 80 | Loss 2.5113 | Acc 0.3518
2025-05-24 13:52:19,110 - INFO - Epoch 8 | Batch 90 | Loss 2.5080 | Acc 0.3523
2025-05-24 13:52:19,589 - INFO - Epoch 8 | Batch 100 | Loss 2.5469 | Acc 0.3516
2025-05-24 13:52:20,068 - INFO - Epoch 8 | Batch 110 | Loss 2.4174 | Acc 0.3531
2025-05-24 13:52:20,551 - INFO - Epoch 8 | Batch 120 | Loss 2.5730 | Acc 0.3514
2025-05-24 13:52:21,030 - INFO - Epoch 8 | Batch 

222222
333333
Epoch 8/10 — Loss: 0.0067  Acc: 0.3493
1111111


2025-05-24 13:53:02,093 - INFO - Epoch 9 | Batch 10 | Loss 2.4270 | Acc 0.3742
2025-05-24 13:53:02,580 - INFO - Epoch 9 | Batch 20 | Loss 2.4281 | Acc 0.3770
2025-05-24 13:53:03,055 - INFO - Epoch 9 | Batch 30 | Loss 2.4684 | Acc 0.3682
2025-05-24 13:53:03,532 - INFO - Epoch 9 | Batch 40 | Loss 2.3877 | Acc 0.3650
2025-05-24 13:53:04,009 - INFO - Epoch 9 | Batch 50 | Loss 2.4126 | Acc 0.3658
2025-05-24 13:53:04,485 - INFO - Epoch 9 | Batch 60 | Loss 2.4198 | Acc 0.3693
2025-05-24 13:53:04,961 - INFO - Epoch 9 | Batch 70 | Loss 2.3750 | Acc 0.3681
2025-05-24 13:53:05,438 - INFO - Epoch 9 | Batch 80 | Loss 2.3205 | Acc 0.3693
2025-05-24 13:53:05,919 - INFO - Epoch 9 | Batch 90 | Loss 2.4683 | Acc 0.3694
2025-05-24 13:53:06,396 - INFO - Epoch 9 | Batch 100 | Loss 2.4242 | Acc 0.3693
2025-05-24 13:53:06,873 - INFO - Epoch 9 | Batch 110 | Loss 2.4003 | Acc 0.3699
2025-05-24 13:53:07,352 - INFO - Epoch 9 | Batch 120 | Loss 2.4527 | Acc 0.3690
2025-05-24 13:53:07,831 - INFO - Epoch 9 | Batch 

222222
333333
Epoch 9/10 — Loss: 0.0064  Acc: 0.3679
1111111


2025-05-24 13:53:49,524 - INFO - Epoch 10 | Batch 10 | Loss 2.3219 | Acc 0.4078
2025-05-24 13:53:50,007 - INFO - Epoch 10 | Batch 20 | Loss 2.3248 | Acc 0.4055
2025-05-24 13:53:50,490 - INFO - Epoch 10 | Batch 30 | Loss 2.3249 | Acc 0.3971
2025-05-24 13:53:50,994 - INFO - Epoch 10 | Batch 40 | Loss 2.4272 | Acc 0.3852
2025-05-24 13:53:51,498 - INFO - Epoch 10 | Batch 50 | Loss 2.3819 | Acc 0.3830
2025-05-24 13:53:51,979 - INFO - Epoch 10 | Batch 60 | Loss 2.2933 | Acc 0.3844
2025-05-24 13:53:52,477 - INFO - Epoch 10 | Batch 70 | Loss 2.3822 | Acc 0.3833
2025-05-24 13:53:52,969 - INFO - Epoch 10 | Batch 80 | Loss 2.2764 | Acc 0.3864
2025-05-24 13:53:53,449 - INFO - Epoch 10 | Batch 90 | Loss 2.3862 | Acc 0.3855
2025-05-24 13:53:53,935 - INFO - Epoch 10 | Batch 100 | Loss 2.3058 | Acc 0.3873
2025-05-24 13:53:54,416 - INFO - Epoch 10 | Batch 110 | Loss 2.3383 | Acc 0.3879
2025-05-24 13:53:54,911 - INFO - Epoch 10 | Batch 120 | Loss 2.4018 | Acc 0.3868
2025-05-24 13:53:55,435 - INFO - Epoc

222222
333333
Epoch 10/10 — Loss: 0.0055  Acc: 0.3886
🏃 View run 20250524_134638 at: http://localhost:5000/#/experiments/1/runs/33a739fa31f34f48855b9a1c141e4655
🧪 View experiment at: http://localhost:5000/#/experiments/1


In [None]:
# use to end the failed test, avoid RUNNING but failed task jam the workflow
mlflow.end_run("6ef3867a369b45f3ba0bc1e2e74a4d90")