In [12]:
# Install required dependencies
import subprocess
import sys

# List of required packages
required_packages = ['ftfy', 'pycocotools', 'fvcore', 'h5py', 'pybase64', 'tqdm', 'torchtext']

print("Installing required packages...")
for package in required_packages:
    try:
        __import__(package.replace('-', '_'))
        print(f"✓ {package} already installed")
    except ImportError:
        print(f"Installing {package}...")
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', package, '-q'])
        print(f"✓ {package} installed")

print("\nAll dependencies installed successfully!")

# Add the ml-cvnets repository root to Python path
import os
notebook_dir = r'd:\Git_repos\other-repos\ml-cvnets'

if notebook_dir not in sys.path:
    sys.path.insert(0, notebook_dir)

print(f"\nAdded {notebook_dir} to Python path")

Installing required packages...
✓ ftfy already installed
✓ pycocotools already installed
✓ fvcore already installed
✓ h5py already installed
✓ pybase64 already installed
✓ tqdm already installed
Installing torchtext...
✓ torchtext installed

All dependencies installed successfully!

Added d:\Git_repos\other-repos\ml-cvnets to Python path
✓ torchtext installed

All dependencies installed successfully!

Added d:\Git_repos\other-repos\ml-cvnets to Python path


In [6]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import argparse
import os
from tqdm import tqdm
import matplotlib.pyplot as plt

# Import from ml-cvnets repository
from cvnets import get_model
from options.opts import get_training_arguments

# Create argument namespace for model configuration
def create_opts_for_mobilevit(num_classes=10, mode='xx_small'):
    """Create options object for MobileViT model"""
    parser = get_training_arguments(parse_args=False)
    opts = parser.parse_args([])
    
    # Basic settings
    setattr(opts, 'common.config_file', None)
    setattr(opts, 'common.results_loc', './results')
    setattr(opts, 'common.override_kwargs', None)
    
    # Dataset settings
    setattr(opts, 'dataset.category', 'classification')
    setattr(opts, 'dataset.name', 'cifar10')
    
    # Model settings
    setattr(opts, 'model.classification.name', 'mobilevit')
    setattr(opts, 'model.classification.n_classes', num_classes)
    setattr(opts, 'model.classification.classifier_dropout', 0.1)
    setattr(opts, 'model.classification.mit.mode', mode)  # xx_small, x_small, or small
    setattr(opts, 'model.classification.mit.head_dim', None)
    setattr(opts, 'model.classification.mit.number_heads', 4)
    setattr(opts, 'model.classification.mit.attn_dropout', 0.0)
    setattr(opts, 'model.classification.mit.ffn_dropout', 0.0)
    setattr(opts, 'model.classification.mit.dropout', 0.1)
    setattr(opts, 'model.classification.mit.no_fuse_local_global_features', False)
    setattr(opts, 'model.classification.mit.conv_kernel_size', 3)
    
    # Activation and normalization
    setattr(opts, 'model.classification.activation.name', 'swish')
    setattr(opts, 'model.normalization.name', 'batch_norm')
    setattr(opts, 'model.normalization.momentum', 0.1)
    setattr(opts, 'model.activation.name', 'swish')
    
    # Layer settings
    setattr(opts, 'model.layer.global_pool', 'mean')
    setattr(opts, 'model.layer.conv_init', 'kaiming_normal')
    setattr(opts, 'model.layer.linear_init', 'trunc_normal')
    setattr(opts, 'model.layer.linear_init_std_dev', 0.02)
    
    return opts

print("Setting up MobileViT for CIFAR-10 training...")
print("=" * 70)

Setting up MobileViT for CIFAR-10 training...


In [7]:
# Training configuration
# Using smaller model (xx_small) for faster training on home setup
# You can try 'x_small' or 'small' for better accuracy (but slower training)
MODEL_MODE = 'xx_small'  # Options: 'xx_small', 'x_small', 'small'
NUM_EPOCHS = 100  # Paper uses 300 for ImageNet, but 100-200 is common for CIFAR-10
BATCH_SIZE = 128  # Adjust based on your GPU memory
LEARNING_RATE = 0.002  # Will be adjusted by warmup and cosine schedule
WEIGHT_DECAY = 0.01
NUM_CLASSES = 10  # CIFAR-10 has 10 classes

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

Using device: cuda
GPU: NVIDIA GeForce RTX 4090
GPU Memory: 25.76 GB


In [10]:
# Data augmentation and normalization for CIFAR-10
# Following standard practices and paper's recommendations
print("Preparing data loaders...")

# CIFAR-10 statistics
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2470, 0.2435, 0.2616)

# Training transforms with augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD),
])

# Test transforms (no augmentation)
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD),
])

# Load CIFAR-10 dataset with retry logic
import time
max_retries = 3
retry_delay = 2

for attempt in range(max_retries):
    try:
        print(f"Attempting to load CIFAR-10 dataset (attempt {attempt + 1}/{max_retries})...")
        
        train_dataset = torchvision.datasets.CIFAR10(
            root='./data', 
            train=True, 
            download=True, 
            transform=train_transform
        )
        
        test_dataset = torchvision.datasets.CIFAR10(
            root='./data', 
            train=False, 
            download=True, 
            transform=test_transform
        )
        
        print("✓ Dataset loaded successfully!")
        break
        
    except Exception as e:
        if attempt < max_retries - 1:
            print(f"Download failed: {e}")
            print(f"Retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)
        else:
            print(f"\n✗ Failed to download CIFAR-10 after {max_retries} attempts.")
            print("Please manually download CIFAR-10 from:")
            print("https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz")
            print("Extract it to ./data/cifar-10-batches-py/")
            raise

train_loader = torch.utils.data.DataLoader(
    train_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=True, 
    num_workers=2,
    pin_memory=True
)

test_loader = torch.utils.data.DataLoader(
    test_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=False, 
    num_workers=2,
    pin_memory=True
)

print(f"\nDataset Statistics:")
print(f"  Training samples: {len(train_dataset)}")
print(f"  Test samples: {len(test_dataset)}")
print(f"  Training batches: {len(train_loader)}")
print(f"  Test batches: {len(test_loader)}")

Preparing data loaders...
Attempting to load CIFAR-10 dataset (attempt 1/3)...
Download failed: <urlopen error [WinError 10061] No connection could be made because the target machine actively refused it>
Retrying in 2 seconds...
Download failed: <urlopen error [WinError 10061] No connection could be made because the target machine actively refused it>
Retrying in 2 seconds...
Attempting to load CIFAR-10 dataset (attempt 2/3)...
Attempting to load CIFAR-10 dataset (attempt 2/3)...
Download failed: <urlopen error [WinError 10061] No connection could be made because the target machine actively refused it>
Retrying in 2 seconds...
Download failed: <urlopen error [WinError 10061] No connection could be made because the target machine actively refused it>
Retrying in 2 seconds...
Attempting to load CIFAR-10 dataset (attempt 3/3)...
Attempting to load CIFAR-10 dataset (attempt 3/3)...
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz
Download

100%|████████████████████████████████████████████████████████████████| 170498071/170498071 [15:46<00:00, 180101.85it/s]



Extracting ./data\cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified
✓ Dataset loaded successfully!

Dataset Statistics:
  Training samples: 50000
  Test samples: 10000
  Training batches: 391
  Test batches: 79
✓ Dataset loaded successfully!

Dataset Statistics:
  Training samples: 50000
  Test samples: 10000
  Training batches: 391
  Test batches: 79


In [13]:
# Create MobileViT model using the repository's infrastructure
print("\nInitializing MobileViT model...")
opts = create_opts_for_mobilevit(num_classes=NUM_CLASSES, mode=MODEL_MODE)

try:
    model = get_model(opts)
    model = model.to(device)
    
    # Print model information
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    
    print(f"Model: MobileViT-{MODEL_MODE}")
    print(f"Total parameters: {total_params:,}")
    print(f"Trainable parameters: {trainable_params:,}")
    print(f"Model size: ~{total_params * 4 / 1e6:.2f} MB (FP32)")
    
    # Test forward pass
    dummy_input = torch.randn(2, 3, 32, 32).to(device)
    with torch.no_grad():
        dummy_output = model(dummy_input)
    print(f"Output shape: {dummy_output.shape}")
    print("✓ Model created successfully!")
    
except Exception as e:
    print(f"Error creating model: {e}")
    raise


Initializing MobileViT model...
2025-12-07 15:58:24 - [93m[1mDEBUG   [0m - Cannot load internal arguments, skipping.
2025-12-07 15:58:35 - [32m[1mINFO   [0m - Trainable parameters: ['conv_1.block.conv.weight', 'conv_1.block.norm.weight', 'conv_1.block.norm.bias', 'layer_1.0.block.exp_1x1.block.conv.weight', 'layer_1.0.block.exp_1x1.block.norm.weight', 'layer_1.0.block.exp_1x1.block.norm.bias', 'layer_1.0.block.conv_3x3.block.conv.weight', 'layer_1.0.block.conv_3x3.block.norm.weight', 'layer_1.0.block.conv_3x3.block.norm.bias', 'layer_1.0.block.red_1x1.block.conv.weight', 'layer_1.0.block.red_1x1.block.norm.weight', 'layer_1.0.block.red_1x1.block.norm.bias', 'layer_2.0.block.exp_1x1.block.conv.weight', 'layer_2.0.block.exp_1x1.block.norm.weight', 'layer_2.0.block.exp_1x1.block.norm.bias', 'layer_2.0.block.conv_3x3.block.conv.weight', 'layer_2.0.block.conv_3x3.block.norm.weight', 'layer_2.0.block.conv_3x3.block.norm.bias', 'layer_2.0.block.red_1x1.block.conv.weight', 'layer_2.0.bl

In [14]:
# Setup optimizer and learning rate scheduler
# Following the paper's training recipe (AdamW with cosine schedule)
print("\nSetting up training components...")

# Loss function with label smoothing (as in the paper)
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

# AdamW optimizer (as in the paper)
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=LEARNING_RATE,
    betas=(0.9, 0.999),
    weight_decay=WEIGHT_DECAY
)

# Cosine annealing learning rate scheduler with warmup
# Paper uses 20k warmup iterations, we'll use 5 epochs for CIFAR-10
warmup_epochs = 5
total_steps = len(train_loader) * NUM_EPOCHS
warmup_steps = len(train_loader) * warmup_epochs

def get_lr(step):
    if step < warmup_steps:
        # Linear warmup
        return (step / warmup_steps)
    else:
        # Cosine annealing
        progress = (step - warmup_steps) / (total_steps - warmup_steps)
        return 0.5 * (1 + torch.cos(torch.tensor(progress * 3.14159)))

scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=get_lr)

print(f"Optimizer: AdamW (lr={LEARNING_RATE}, weight_decay={WEIGHT_DECAY})")
print(f"Scheduler: Cosine annealing with {warmup_epochs} epoch warmup")
print(f"Loss: CrossEntropyLoss with label smoothing 0.1")


Setting up training components...
Optimizer: AdamW (lr=0.002, weight_decay=0.01)
Scheduler: Cosine annealing with 5 epoch warmup
Loss: CrossEntropyLoss with label smoothing 0.1


In [15]:
# Training and evaluation functions
def train_epoch(model, train_loader, criterion, optimizer, scheduler, device, epoch):
    """Train for one epoch"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{NUM_EPOCHS}')
    for batch_idx, (inputs, targets) in enumerate(pbar):
        inputs, targets = inputs.to(device), targets.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        # Statistics
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
        # Update progress bar
        if batch_idx % 10 == 0:
            current_lr = optimizer.param_groups[0]['lr']
            pbar.set_postfix({
                'loss': f'{running_loss/(batch_idx+1):.3f}',
                'acc': f'{100.*correct/total:.2f}%',
                'lr': f'{current_lr:.6f}'
            })
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc


def evaluate(model, test_loader, criterion, device):
    """Evaluate the model"""
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, targets in tqdm(test_loader, desc='Evaluating', leave=False):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    test_loss = test_loss / len(test_loader)
    test_acc = 100. * correct / total
    return test_loss, test_acc

print("Training and evaluation functions defined.")

Training and evaluation functions defined.


In [None]:
# Training loop with checkpointing
print("\n" + "="*70)
print("Starting Training")
print("="*70)

# Create checkpoint directory
os.makedirs('./checkpoints', exist_ok=True)

# Training history
history = {
    'train_loss': [],
    'train_acc': [],
    'test_loss': [],
    'test_acc': [],
    'lr': []
}

best_acc = 0.0
best_epoch = 0

try:
    for epoch in range(NUM_EPOCHS):
        # Train
        train_loss, train_acc = train_epoch(
            model, train_loader, criterion, optimizer, scheduler, device, epoch
        )
        
        # Evaluate every 5 epochs or at the last epoch
        if (epoch + 1) % 5 == 0 or epoch == NUM_EPOCHS - 1:
            test_loss, test_acc = evaluate(model, test_loader, criterion, device)
            
            # Save history
            history['train_loss'].append(train_loss)
            history['train_acc'].append(train_acc)
            history['test_loss'].append(test_loss)
            history['test_acc'].append(test_acc)
            history['lr'].append(optimizer.param_groups[0]['lr'])
            
            print(f'\nEpoch {epoch+1}/{NUM_EPOCHS}:')
            print(f'  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%')
            print(f'  Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.2f}%')
            print(f'  Learning Rate: {optimizer.param_groups[0]["lr"]:.6f}')
            
            # Save best model
            if test_acc > best_acc:
                best_acc = test_acc
                best_epoch = epoch + 1
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(),
                    'best_acc': best_acc,
                    'history': history,
                }, './checkpoints/mobilevit_cifar10_best.pth')
                print(f'  ✓ Best model saved! (Acc: {best_acc:.2f}%)')
        
        # Save checkpoint every 20 epochs
        if (epoch + 1) % 20 == 0:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'history': history,
            }, f'./checkpoints/mobilevit_cifar10_epoch_{epoch+1}.pth')
            print(f'  Checkpoint saved at epoch {epoch+1}')
    
    print("\n" + "="*70)
    print("Training Completed!")
    print(f"Best Test Accuracy: {best_acc:.2f}% at epoch {best_epoch}")
    print("="*70)
    
except KeyboardInterrupt:
    print("\n\nTraining interrupted by user.")
    print(f"Best accuracy so far: {best_acc:.2f}% at epoch {best_epoch}")
except Exception as e:
    print(f"\n\nError during training: {e}")
    raise


Starting Training


Epoch 1/100: 100%|██████████████████████████████| 391/391 [00:28<00:00, 13.93it/s, loss=2.294, acc=16.92%, lr=0.000400]
Epoch 1/100: 100%|██████████████████████████████| 391/391 [00:28<00:00, 13.93it/s, loss=2.294, acc=16.92%, lr=0.000400]
Epoch 2/100: 100%|██████████████████████████████| 391/391 [00:24<00:00, 15.89it/s, loss=1.929, acc=32.09%, lr=0.000800]
Epoch 2/100: 100%|██████████████████████████████| 391/391 [00:24<00:00, 15.89it/s, loss=1.929, acc=32.09%, lr=0.000800]
Epoch 3/100: 100%|██████████████████████████████| 391/391 [00:24<00:00, 15.91it/s, loss=1.745, acc=41.41%, lr=0.001200]
Epoch 3/100: 100%|██████████████████████████████| 391/391 [00:24<00:00, 15.91it/s, loss=1.745, acc=41.41%, lr=0.001200]
Epoch 4/100: 100%|██████████████████████████████| 391/391 [00:27<00:00, 14.02it/s, loss=1.641, acc=46.91%, lr=0.001600]
Epoch 4/100: 100%|██████████████████████████████| 391/391 [00:27<00:00, 14.02it/s, loss=1.641, acc=46.91%, lr=0.001600]
Epoch 5/100: 100%|██████████████████████


Epoch 5/100:
  Train Loss: 1.5643 | Train Acc: 50.89%
  Test Loss: 1.4564 | Test Acc: 56.37%
  Learning Rate: 0.002000
  ✓ Best model saved! (Acc: 56.37%)


Epoch 6/100: 100%|██████████████████████████████| 391/391 [00:44<00:00,  8.70it/s, loss=1.461, acc=56.32%, lr=0.001999]
Epoch 6/100: 100%|██████████████████████████████| 391/391 [00:44<00:00,  8.70it/s, loss=1.461, acc=56.32%, lr=0.001999]
Epoch 7/100: 100%|██████████████████████████████| 391/391 [00:45<00:00,  8.53it/s, loss=1.381, acc=59.85%, lr=0.001998]
Epoch 7/100: 100%|██████████████████████████████| 391/391 [00:45<00:00,  8.53it/s, loss=1.381, acc=59.85%, lr=0.001998]
Epoch 8/100: 100%|██████████████████████████████| 391/391 [00:42<00:00,  9.11it/s, loss=1.327, acc=62.42%, lr=0.001995]
Epoch 8/100: 100%|██████████████████████████████| 391/391 [00:42<00:00,  9.11it/s, loss=1.327, acc=62.42%, lr=0.001995]
Epoch 9/100: 100%|██████████████████████████████| 391/391 [00:44<00:00,  8.71it/s, loss=1.281, acc=64.33%, lr=0.001991]
Epoch 9/100: 100%|██████████████████████████████| 391/391 [00:44<00:00,  8.71it/s, loss=1.281, acc=64.33%, lr=0.001991]
Epoch 10/100: 100%|█████████████████████


Epoch 10/100:
  Train Loss: 1.2480 | Train Acc: 66.13%
  Test Loss: 1.1896 | Test Acc: 68.85%
  Learning Rate: 0.001986
  ✓ Best model saved! (Acc: 68.85%)


Epoch 11/100: 100%|█████████████████████████████| 391/391 [00:41<00:00,  9.48it/s, loss=1.210, acc=67.99%, lr=0.001980]
Epoch 11/100: 100%|█████████████████████████████| 391/391 [00:41<00:00,  9.48it/s, loss=1.210, acc=67.99%, lr=0.001980]
Epoch 12/100: 100%|█████████████████████████████| 391/391 [00:41<00:00,  9.52it/s, loss=1.177, acc=69.63%, lr=0.001973]
Epoch 12/100: 100%|█████████████████████████████| 391/391 [00:41<00:00,  9.52it/s, loss=1.177, acc=69.63%, lr=0.001973]
Epoch 13/100: 100%|█████████████████████████████| 391/391 [00:40<00:00,  9.70it/s, loss=1.146, acc=71.09%, lr=0.001965]
Epoch 13/100: 100%|█████████████████████████████| 391/391 [00:40<00:00,  9.70it/s, loss=1.146, acc=71.09%, lr=0.001965]
Epoch 14/100: 100%|█████████████████████████████| 391/391 [00:40<00:00,  9.56it/s, loss=1.124, acc=72.05%, lr=0.001956]
Epoch 14/100: 100%|█████████████████████████████| 391/391 [00:40<00:00,  9.56it/s, loss=1.124, acc=72.05%, lr=0.001956]
Epoch 15/100: 100%|█████████████████████


Epoch 15/100:
  Train Loss: 1.1030 | Train Acc: 73.15%
  Test Loss: 1.1083 | Test Acc: 72.79%
  Learning Rate: 0.001946
  ✓ Best model saved! (Acc: 72.79%)


Epoch 16/100: 100%|█████████████████████████████| 391/391 [00:40<00:00,  9.57it/s, loss=1.081, acc=74.06%, lr=0.001935]
Epoch 16/100: 100%|█████████████████████████████| 391/391 [00:40<00:00,  9.57it/s, loss=1.081, acc=74.06%, lr=0.001935]
Epoch 17/100:  56%|████████████████▏            | 218/391 [00:26<00:15, 11.45it/s, loss=1.061, acc=75.44%, lr=0.001928]

In [None]:
# Plot training history
if len(history['train_loss']) > 0:
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    
    # Plot loss
    axes[0].plot(history['train_loss'], label='Train Loss', marker='o')
    axes[0].plot(history['test_loss'], label='Test Loss', marker='s')
    axes[0].set_xlabel('Evaluation Step')
    axes[0].set_ylabel('Loss')
    axes[0].set_title('Training and Test Loss')
    axes[0].legend()
    axes[0].grid(True)
    
    # Plot accuracy
    axes[1].plot(history['train_acc'], label='Train Acc', marker='o')
    axes[1].plot(history['test_acc'], label='Test Acc', marker='s')
    axes[1].set_xlabel('Evaluation Step')
    axes[1].set_ylabel('Accuracy (%)')
    axes[1].set_title('Training and Test Accuracy')
    axes[1].legend()
    axes[1].grid(True)
    
    # Plot learning rate
    axes[2].plot(history['lr'], marker='o', color='orange')
    axes[2].set_xlabel('Evaluation Step')
    axes[2].set_ylabel('Learning Rate')
    axes[2].set_title('Learning Rate Schedule')
    axes[2].grid(True)
    axes[2].set_yscale('log')
    
    plt.tight_layout()
    plt.savefig('./checkpoints/training_history.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    print(f"\nFinal Results:")
    print(f"  Best Test Accuracy: {best_acc:.2f}%")
    print(f"  Final Train Accuracy: {history['train_acc'][-1]:.2f}%")
    print(f"  Final Test Accuracy: {history['test_acc'][-1]:.2f}%")
    print(f"\nModel: MobileViT-{MODEL_MODE}")
    print(f"Parameters: {total_params:,}")
    print(f"Training completed in {NUM_EPOCHS} epochs")
else:
    print("No training history to plot.")