# Offline Learning Resources for Comprehensive Project

## Load libraries and configure environment

In [6]:
%load_ext autoreload
%autoreload 2

import yaml
import torch
import matplotlib.pyplot as plt
import seaborn as sns

from data.imagenet_loader import ContinualImageNet
from models.backbone import get_backbone
from strategies.joint_training import JointTrainer
from utils.metrics import CLMetrics


with open('config.yaml', 'r') as f:
    cfg = yaml.safe_load(f)

device = torch.device(cfg['experiment']['device'] if torch.cuda.is_available() else "cpu")
print(f"Config loaded successfully.")
print(f"Device: {device}")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Config loaded successfully.
Device: cuda


## Load dataset

In [3]:
# Setup Data & Metrics
cl_data = ContinualImageNet(
    root_dir=cfg['data']['root_dir'],
    num_tasks=cfg['data']['num_tasks'],
    total_classes=cfg['data']['total_classes'],
    batch_size=cfg['training']['batch_size'],
    num_workers=cfg['data']['num_workers']
)

cl_metrics = CLMetrics(cfg['data']['num_tasks'])
history = {'acc': [], 'forget': []}

model = get_backbone(cfg)
trainer = JointTrainer(model, device, cfg)

FileNotFoundError: [Errno 2] No such file or directory: './data/imagenet256/train'

## Train model with offline learning approach

In [4]:
num_tasks = cfg['data']['num_tasks']
epochs = cfg['training']['epochs_per_task']
save_dir = cfg['experiment']['save_dir']

### Training loop

In [None]:
for task_id in range(num_tasks):
    print(f"\n{'='*30}")
    print(f"   PHASE {task_id+1} / {num_tasks}: Learning Task {task_id}")
    print(f"{'='*30}")

    print(f"Loading cumulative data (Task 0-{task_id})...")
    train_loader = cl_data.get_data_loader(task_id, mode='train', cumulative=True)
    
    trainer.train(train_loader, epochs=epochs)
    trainer.save_checkpoint(task_id, save_dir)
    task_accuracies = []
    
    for test_task in range(task_id + 1):
        val_loader = cl_data.get_data_loader(test_task, mode='val', cumulative=False)
        acc = trainer.evaluate(val_loader)
        
        cl_metrics.update(train_phase=task_id, test_task=test_task, accuracy=acc)
        history['task_acc'][test_task].append(acc)
        task_accuracies.append(acc)
        
        print(f"    [Test Task {test_task}]: {acc:.2f}%")
        
    # 3. STATS
    avg_acc, avg_forget = cl_metrics.calculate_metrics(task_id)
    history['avg_acc'].append(avg_acc)
    history['avg_forget'].append(avg_forget)
    
    print(f"\n[Summary phase {task_id}] Avg Acc: {avg_acc:.2f}% | Forget: {avg_forget:.2f}%")




NameError: name 'cl_data' is not defined

## Visualize results

In [7]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import confusion_matrix

def get_all_predictions(model, dataset, device, num_tasks):
    """
    Hàm lấy toàn bộ dự đoán trên tập Validation gộp (tất cả các task).
    Trả về: y_true, y_pred
    """
    model.eval()
    all_preds = []
    all_targets = []
    
    print("Collecting predictions from all tasks for analysis...")
    
    loader, _ = dataset.get_data_loader(num_tasks - 1, mode='val', cumulative=True)
    
    with torch.no_grad():
        for inputs, labels in notebook(loader, desc="Inference", leave=False):
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, preds = outputs.max(1)
            
            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(labels.numpy())
            
    return np.array(all_targets), np.array(all_preds)

def plot_advanced_metrics(history, y_true, y_pred, num_tasks, total_classes):
    sns.set_style("whitegrid")
    fig = plt.figure(figsize=(20, 12))
    

    ax1 = plt.subplot(2, 2, 1)
    for t_id, accs in history['task_acc'].items():
        if len(accs) > 0:
            phases = range(t_id, num_tasks) 
            current_accs = accs[:len(phases)]
            ax1.plot(phases, current_accs, marker='o', linewidth=2, label=f'Task {t_id}')
            
    ax1.set_title("Performance Evolution per Task", fontsize=14, fontweight='bold')
    ax1.set_xlabel("Training Phase (After learning Task X)", fontsize=12)
    ax1.set_ylabel("Accuracy (%)", fontsize=12)
    ax1.set_xticks(range(num_tasks))
    ax1.legend()
    ax1.grid(True, linestyle='--', alpha=0.7)

    ax2 = plt.subplot(2, 2, 2)
    cm = confusion_matrix(y_true, y_pred, labels=range(total_classes))
    classes_per_task = total_classes // num_tasks
    task_cm = np.zeros((num_tasks, num_tasks))
    
    for i in range(num_tasks):
        for j in range(num_tasks):
            row_start, row_end = i*classes_per_task, (i+1)*classes_per_task
            col_start, col_end = j*classes_per_task, (j+1)*classes_per_task
            
            task_cm[i, j] = np.sum(cm[row_start:row_end, col_start:col_end])


    row_sums = task_cm.sum(axis=1, keepdims=True)
    task_cm_norm = np.divide(task_cm, row_sums, where=row_sums!=0) * 100
    
    sns.heatmap(task_cm_norm, annot=True, fmt=".1f", cmap="OrRd", ax=ax2, vmin=0, vmax=100)
    ax2.set_title("Task confusion matrix (Row: True, Col: Pred)", fontsize=14, fontweight='bold')
    ax2.set_xlabel("Predicted task ID", fontsize=12)
    ax2.set_ylabel("True task ID", fontsize=12)

    ax3 = plt.subplot(2, 1, 2)
    
    class_accuracies = []
    cm_diag = cm.diagonal()
    class_totals = cm.sum(axis=1)
    per_class_acc = np.divide(cm_diag, class_totals, where=class_totals!=0) * 100
    sns.histplot(per_class_acc, bins=30, kde=True, ax=ax3, color='teal')
    ax3.axvline(np.mean(per_class_acc), color='r', linestyle='--', label=f'Mean Acc: {np.mean(per_class_acc):.2f}%')
    
    ax3.set_title("Distribution of per-Class accuracy", fontsize=14, fontweight='bold')
    ax3.set_xlabel("Accuracy (%)", fontsize=12)
    ax3.set_ylabel("Number of Classes", fontsize=12)
    ax3.legend()

    plt.tight_layout()
    plt.show()
    
    return task_cm_norm

y_true, y_pred = get_all_predictions(model, cl_data, device, cfg['data']['num_tasks'])
_ = plot_advanced_metrics(history, y_true, y_pred, cfg['data']['num_tasks'], cfg['data']['total_classes'])

NameError: name 'model' is not defined