**ANALYSIS NOTEBOOK**

In [1]:
# Evaluation and visualization for analysis of performance
import os
import sys
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import torch

In [2]:
# Plotting the logs of the history files (compatible with .json and .pkl)
def plot_training_history(history_path, output_path):
    # Determine file extension
    _, ext = os.path.splitext(history_path)
    
    # Load history based on file type
    if ext == '.json':
        import json
        with open(history_path, 'r') as f:
            history = json.load(f)
    elif ext == '.pkl':
        import pickle
        with open(history_path, 'rb') as f:
            history = pickle.load(f)
    else:
        raise ValueError("Unsupported file format. Use .json or .pkl")
    
    # Convert to DataFrame for easier plotting
    if isinstance(history, dict):
        # If history is a dict with lists as values, convert directly
        df = pd.DataFrame(history)
    else:
        # If history is a list of dicts, convert to DataFrame
        df = pd.DataFrame(history)
    
    # Check for required columns (handle different naming conventions)
    train_col = next((col for col in df.columns if 'train' in col.lower() and 'acc' in col.lower()), None)
    val_col = next((col for col in df.columns if 'val' in col.lower() and 'acc' in col.lower()), None)
    
    if train_col is None or val_col is None:
        print(f"Warning: Could not find train/val accuracy columns. Available: {list(df.columns)}")
        return
    
    # Plot training and validation accuracy
    plt.figure(figsize=(10, 5))
    if 'epoch' in df.columns:
        plt.plot(df['epoch'], df[train_col], label='Train Accuracy', marker='o')
        plt.plot(df['epoch'], df[val_col], label='Validation Accuracy', marker='s')
    else:
        plt.plot(df[train_col], label='Train Accuracy', marker='o')
        plt.plot(df[val_col], label='Validation Accuracy', marker='s')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title(f'Training and Validation Accuracy - {os.path.basename(history_path)}')
    plt.legend()
    plt.grid(True)
    plt.savefig(output_path, dpi=100, bbox_inches='tight')
    plt.close()

In [3]:
# History file paths (may be absent if not generated)
distilled_model_history_path = '/Users/academic-city-university/Naps/DeepExam/Deep-Learning_EuroSAT_Classifier/lightvision/outputs/logs/distillation_history.json'
pruned_model_history_path = '/Users/academic-city-university/Naps/DeepExam/Deep-Learning_EuroSAT_Classifier/lightvision/outputs/logs/pruned_history.json'
qat_model_history_path = '/Users/academic-city-university/Naps/DeepExam/Deep-Learning_EuroSAT_Classifier/lightvision/outputs/logs/qat_history.json'
resnet18_model_history_path = '/Users/academic-city-university/Naps/DeepExam/Deep-Learning_EuroSAT_Classifier/lightvision/outputs/logs/student_resnet18_history.pkl'
teacher_model_history_path = '/Users/academic-city-university/Naps/DeepExam/Deep-Learning_EuroSAT_Classifier/lightvision/outputs/logs/teacher_training_history.json'
OUTPUT_DIR = '/Users/academic-city-university/Naps/DeepExam/Deep-Learning_EuroSAT_Classifier/lightvision/outputs/plots'

In [4]:
# Calling the plotting function for each model
history_files = [
    (distilled_model_history_path, os.path.join(OUTPUT_DIR, 'distilled_model_history.png')),
    (pruned_model_history_path, os.path.join(OUTPUT_DIR, 'pruned_model_history.png')),
    (qat_model_history_path, os.path.join(OUTPUT_DIR, 'qat_model_history.png')),
    (resnet18_model_history_path, os.path.join(OUTPUT_DIR, 'resnet18_student_model_history.png')),
    (teacher_model_history_path, os.path.join(OUTPUT_DIR, 'teacher_model_history.png')),
]

for history_path, output_path in history_files:
    if os.path.exists(history_path):
        try:
            plot_training_history(history_path, output_path)
            print(f"✓ Plotted {os.path.basename(history_path)} → {os.path.basename(output_path)}")
        except Exception as e:
            print(f"✗ Error plotting {os.path.basename(history_path)}: {e}")
    else:
        print(f"⚠ Skipping {os.path.basename(history_path)} - file not found")

✓ Plotted distillation_history.json → distilled_model_history.png
✓ Plotted pruned_history.json → pruned_model_history.png
✓ Plotted qat_history.json → qat_model_history.png
✓ Plotted student_resnet18_history.pkl → resnet18_student_model_history.png
✓ Plotted teacher_training_history.json → teacher_model_history.png


## Model comparison: accuracy, size, latency, and FLOPs

In [5]:
# Load evaluation results (produced by scripts/evaluation/05_evaluate_model.py)
eval_path = '/Users/academic-city-university/Naps/DeepExam/Deep-Learning_EuroSAT_Classifier/lightvision/outputs/reports/evaluation_results.json'
if not os.path.exists(eval_path):
    print('Evaluation results not found at', eval_path)
    df = pd.DataFrame()
else:
    import json
    with open(eval_path, 'r') as f:
        results = json.load(f)
    # Normalize into a DataFrame
    rows = []
    for name, info in results.items():
        rows.append({
            'name': name,
            'checkpoint': info.get('checkpoint'),
            'accuracy': info.get('accuracy'),
            'loss': info.get('loss'),
            'latency_ms': info.get('latency_ms'),
            'size_mb': info.get('size_mb'),
        })
    df = pd.DataFrame(rows).set_index('name')
    display(df)


Unnamed: 0_level_0,checkpoint,accuracy,loss,latency_ms,size_mb
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
teacher_resnet50,outputs/models/teacher_resnet50.pth,0.983457,0.053061,7.378359,90.056
student_resnet18,outputs/models/student_resnet18.pth,0.974321,0.090249,2.516418,42.73
pruned_model,outputs/models/pruned_model.pth,0.975309,0.075106,2.513332,42.729
distilled_model,outputs/models/distilled_model.pth,0.984938,0.048888,2.546458,42.729
qat_model,outputs/models/quantized_model.pth,0.98321,0.062404,2.533035,42.729


In [6]:
# Add approximate FLOPs based on architecture heuristics
def guess_flops(name):
    n = name.lower()
    # Typical single-image FLOPs (multiply-adds) approximations
    if 'resnet50' in n or 'teacher' in n:
        return 4.1e9
    if 'resnet18' in n or 'student' in n or 'distill' in n or 'qat' in n or 'pruned' in n:
        return 1.8e9
    if 'mobilenet' in n or 'mobile' in n:
        return 0.15e9
    return np.nan

if not df.empty:
    df['flops'] = [guess_flops(n) for n in df.index]
    df['accuracy_per_mb'] = df['accuracy'] / df['size_mb']
    df['accuracy_per_gflop'] = df['accuracy'] / (df['flops'] / 1e9)
    display(df.sort_values('accuracy', ascending=False))


Unnamed: 0_level_0,checkpoint,accuracy,loss,latency_ms,size_mb,flops,accuracy_per_mb,accuracy_per_gflop
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
distilled_model,outputs/models/distilled_model.pth,0.984938,0.048888,2.546458,42.729,1800000000.0,0.023051,0.547188
teacher_resnet50,outputs/models/teacher_resnet50.pth,0.983457,0.053061,7.378359,90.056,4100000000.0,0.010921,0.239868
qat_model,outputs/models/quantized_model.pth,0.98321,0.062404,2.533035,42.729,1800000000.0,0.02301,0.546228
pruned_model,outputs/models/pruned_model.pth,0.975309,0.075106,2.513332,42.729,1800000000.0,0.022825,0.541838
student_resnet18,outputs/models/student_resnet18.pth,0.974321,0.090249,2.516418,42.73,1800000000.0,0.022802,0.541289


In [8]:
# Plots: Accuracy vs Size, Accuracy vs FLOPs, Latency comparison
if not df.empty:
    # ensure output dir exists
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # bring the index (model name) into a column for plotting
    reset_df = df.reset_index()  # index column is 'name'

    plt.figure(figsize=(6, 5))
    sns.scatterplot(data=reset_df, x='size_mb', y='accuracy', hue='name', s=100)
    plt.xlabel('Model size (MB)')
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs Model Size')
    plt.tight_layout()
    out1 = os.path.join(OUTPUT_DIR, 'comparison_accuracy_size.png')
    plt.savefig(out1, dpi=120)
    plt.close()
    print('Saved', out1)
    
    plt.figure(figsize=(6, 5))
    sns.scatterplot(data=reset_df, x='flops', y='accuracy', hue='name', s=100)
    plt.xlabel('FLOPs')
    plt.xscale('log')
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs FLOPs')
    plt.tight_layout()
    out2 = os.path.join(OUTPUT_DIR, 'comparison_accuracy_flops.png')
    plt.savefig(out2, dpi=120)
    plt.close()
    print('Saved', out2)
    
    plt.figure(figsize=(8, 4))
    reset_df.set_index('name')['latency_ms'].plot(kind='bar')
    plt.ylabel('Latency (ms)')
    plt.title('Inference Latency')
    out3 = os.path.join(OUTPUT_DIR, 'comparison_latency.png')
    plt.tight_layout()
    plt.savefig(out3, dpi=120)
    plt.close()
    print('Saved', out3)


Saved /Users/academic-city-university/Naps/DeepExam/Deep-Learning_EuroSAT_Classifier/lightvision/outputs/plots/comparison_accuracy_size.png
Saved /Users/academic-city-university/Naps/DeepExam/Deep-Learning_EuroSAT_Classifier/lightvision/outputs/plots/comparison_accuracy_flops.png
Saved /Users/academic-city-university/Naps/DeepExam/Deep-Learning_EuroSAT_Classifier/lightvision/outputs/plots/comparison_latency.png


## Select best student architecture for compression experiments

In [10]:
# Choose best student variant by accuracy/size trade-off
if not df.empty:
    # Select rows by index (use .loc) instead of trying to select columns
    student_names = [idx for idx in df.index if ('student' in idx) or ('resnet18' in idx) or ('distill' in idx) or ('qat' in idx) or ('pruned' in idx)]
    students = df.loc[student_names] if student_names else df.iloc[0:0]

    if students.empty:
        print('No student variants found in evaluation results.')
    else:
        # Ensure the metric exists
        if 'accuracy_per_mb' not in students.columns:
            students['accuracy_per_mb'] = students['accuracy'] / students['size_mb']
        best = students['accuracy_per_mb'].idxmax()
        print('Best student variant by accuracy/MB:', best)
        display(students.loc[[best]])
else:
    print('No evaluation results to select from.')


Best student variant by accuracy/MB: distilled_model


Unnamed: 0_level_0,checkpoint,accuracy,loss,latency_ms,size_mb,flops,accuracy_per_mb,accuracy_per_gflop
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
distilled_model,outputs/models/distilled_model.pth,0.984938,0.048888,2.546458,42.729,1800000000.0,0.023051,0.547188
