# Attention only FC-LoRA Cifar-100

In [2]:
import torch
import time
import numpy as np
import json
from data_utils import *
from model_utils import *
from train_utils import *
from eval_utils import *

def main():
    # Load configuration
    with open('config.json', 'r') as f:
        config = json.load(f)
    
    start = time.time()
    np.random.seed(config['seed'])
    torch.manual_seed(config['seed'])

    # Data
    train_loader, val_loader, test_loader = get_data_loaders(config)

    # Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = initialize_model(device, config)

    # Adaptive LoRA ranks
    adaptive_ranks = compute_adaptive_ranks(model, train_loader, device, config)

    # Apply LoRA
    model = apply_lora(model, adaptive_ranks, device, config)

    # Set trainable parameters
    set_trainable_parameters(model)

    # Train
    best_model_path = config['paths']['best_model']
    train_losses, val_losses, train_accuracies, val_accuracies, best_epoch = train_model(
        model, train_loader, val_loader, test_loader, device, best_model_path, config
    )

    # Evaluate
    metrics = evaluate_model(model, test_loader, val_loader, device, best_model_path, adaptive_ranks)
    
    # Plot
    plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies, config)

    # Print final results
    print(f"Best Model (Epoch {best_epoch}): Test Loss: {metrics['test_loss']:.4f}, "
          f"Test Accuracy: {metrics['test_accuracy']:.2f}%")
    print(f"ECE: {metrics['ece']:.4f}, Scaled ECE: {metrics['scaled_ece']:.4f}, "
          f"Scaled Test Accuracy: {metrics['scaled_test_accuracy']:.2f}%")
    print(f"Class-wise Accuracy: Mean {metrics['class_acc_mean']:.2f}, "
          f"Std {metrics['class_acc_std']:.2f}")
    print(f"Total training time: {(time.time() - start):.2f} seconds")
    
    torch.save(model.state_dict(), config['paths']['final_model'])

if __name__ == "__main__":
    main()

Files already downloaded and verified
Files already downloaded and verified


Some weights of DeiTForImageClassification were not initialized from the model checkpoint at facebook/deit-base-distilled-patch16-224 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
                                                                      


Adaptive Ranks (Attention Only): {'deit.encoder.layer.0.attention.attention.value': 16, 'deit.encoder.layer.11.attention.attention.value': 16, 'deit.encoder.layer.9.attention.attention.value': 16, 'deit.encoder.layer.8.attention.attention.value': 16, 'deit.encoder.layer.4.attention.attention.value': 16, 'deit.encoder.layer.10.attention.attention.value': 16, 'deit.encoder.layer.3.attention.attention.value': 4, 'deit.encoder.layer.7.attention.attention.value': 1, 'deit.encoder.layer.6.attention.attention.value': 1, 'deit.encoder.layer.5.attention.attention.value': 1, 'deit.encoder.layer.2.attention.attention.value': 1, 'deit.encoder.layer.1.attention.attention.value': 1, 'deit.encoder.layer.9.attention.attention.query': 1, 'deit.encoder.layer.8.attention.attention.query': 1, 'deit.encoder.layer.7.attention.attention.query': 1, 'deit.encoder.layer.3.attention.attention.key': 1, 'deit.encoder.layer.5.attention.attention.query': 1, 'deit.encoder.layer.6.attention.attention.query': 1, 'deit

Epoch 1/15: 100%|██████████| 1250/1250 [02:40<00:00,  7.81it/s, Loss=1.3949]


Epoch 1: New best model saved with Val Accuracy: 81.78%
Epoch 1: Train Loss: 2.5448, Val Loss: 1.3646, Train Acc: 54.46%, Val Acc: 81.78%


Epoch 2/15: 100%|██████████| 1250/1250 [02:39<00:00,  7.82it/s, Loss=1.1472]


Epoch 2: New best model saved with Val Accuracy: 85.41%
Epoch 2: Train Loss: 1.7690, Val Loss: 1.2306, Train Acc: 70.70%, Val Acc: 85.41%


Epoch 3/15: 100%|██████████| 1250/1250 [02:39<00:00,  7.81it/s, Loss=1.1179]


Epoch 3: New best model saved with Val Accuracy: 86.92%
Epoch 3: Train Loss: 1.6279, Val Loss: 1.1887, Train Acc: 75.72%, Val Acc: 86.92%


Epoch 4/15: 100%|██████████| 1250/1250 [02:39<00:00,  7.83it/s, Loss=1.3021]


Epoch 4: New best model saved with Val Accuracy: 87.48%
Epoch 4: Train Loss: 1.5484, Val Loss: 1.1699, Train Acc: 76.92%, Val Acc: 87.48%


Epoch 5/15: 100%|██████████| 1250/1250 [02:39<00:00,  7.83it/s, Loss=1.8479]


Epoch 5: Test Loss: 1.1683, Test Accuracy: 87.67%
Epoch 5: New best model saved with Val Accuracy: 87.85%
Epoch 5: Train Loss: 1.5359, Val Loss: 1.1583, Train Acc: 76.84%, Val Acc: 87.85%


Epoch 6/15: 100%|██████████| 1250/1250 [02:40<00:00,  7.80it/s, Loss=0.9426]


Epoch 6: New best model saved with Val Accuracy: 88.62%
Epoch 6: Train Loss: 1.4945, Val Loss: 1.1408, Train Acc: 78.06%, Val Acc: 88.62%


Epoch 7/15: 100%|██████████| 1250/1250 [02:40<00:00,  7.80it/s, Loss=1.0584]


Epoch 7: New best model saved with Val Accuracy: 89.24%
Epoch 7: Train Loss: 1.4539, Val Loss: 1.1348, Train Acc: 80.16%, Val Acc: 89.24%


Epoch 8/15: 100%|██████████| 1250/1250 [02:40<00:00,  7.81it/s, Loss=1.5910]


Epoch 8: New best model saved with Val Accuracy: 89.41%
Epoch 8: Train Loss: 1.4338, Val Loss: 1.1198, Train Acc: 82.03%, Val Acc: 89.41%


Epoch 9/15: 100%|██████████| 1250/1250 [02:38<00:00,  7.88it/s, Loss=0.9163]


Epoch 9: New best model saved with Val Accuracy: 89.57%
Epoch 9: Train Loss: 1.3901, Val Loss: 1.1211, Train Acc: 82.20%, Val Acc: 89.57%


Epoch 10/15: 100%|██████████| 1250/1250 [02:38<00:00,  7.90it/s, Loss=0.8910]


Epoch 10: Test Loss: 1.1344, Test Accuracy: 89.01%
Epoch 10: New best model saved with Val Accuracy: 89.75%
Epoch 10: Train Loss: 1.3707, Val Loss: 1.1189, Train Acc: 83.08%, Val Acc: 89.75%


Epoch 11/15: 100%|██████████| 1250/1250 [02:39<00:00,  7.83it/s, Loss=1.0707]


Epoch 11: New best model saved with Val Accuracy: 89.84%
Epoch 11: Train Loss: 1.3743, Val Loss: 1.1183, Train Acc: 83.44%, Val Acc: 89.84%


Epoch 12/15: 100%|██████████| 1250/1250 [02:39<00:00,  7.82it/s, Loss=2.0098]


Epoch 12: Train Loss: 1.3553, Val Loss: 1.1176, Train Acc: 83.68%, Val Acc: 89.75%


Epoch 13/15: 100%|██████████| 1250/1250 [02:37<00:00,  7.92it/s, Loss=2.2069]


Epoch 13: Train Loss: 1.3275, Val Loss: 1.1178, Train Acc: 84.87%, Val Acc: 89.73%


Epoch 14/15: 100%|██████████| 1250/1250 [02:36<00:00,  7.99it/s, Loss=0.8883]


Epoch 14: Train Loss: 1.3095, Val Loss: 1.1143, Train Acc: 85.83%, Val Acc: 89.78%


Epoch 15/15: 100%|██████████| 1250/1250 [02:36<00:00,  7.97it/s, Loss=1.8669]


Epoch 15: Test Loss: 1.1284, Test Accuracy: 89.19%
Epoch 15: New best model saved with Val Accuracy: 89.97%
Epoch 15: Train Loss: 1.3260, Val Loss: 1.1146, Train Acc: 84.63%, Val Acc: 89.97%


Some weights of DeiTForImageClassification were not initialized from the model checkpoint at facebook/deit-base-distilled-patch16-224 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Best Model (Epoch 15): Test Loss: 1.1284, Test Accuracy: 89.19%
ECE: 0.0920, Scaled ECE: 0.0146, Scaled Test Accuracy: 89.19%
Class-wise Accuracy: Mean 0.89, Std 0.08
Total training time: 2862.12 seconds
