In [1]:
import gc
import os
import tensorflow as tf
from model_architectures import (
    build_model_1, build_model_2, build_model_3, build_model_4, build_model_5,
    build_model_6, build_model_7, build_model_8, build_model_9, build_model_10,
    MODEL_REGISTRY
)
from model_training import cross_validate, precision_m, recall_m, f1_m
from fine_tuning import finetune_all_folds, compare_before_after_finetuning
from test_energy_measurement import measure_all_test_power
import numpy as np
import pickle
import pprint

2025-10-05 12:26:09.630754: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-10-05 12:26:11.142727: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.2/lib64:/usr/local/cuda-11.2/lib64:/usr/lib/x86_64-linux-gnu/gazebo-11/plugins:/opt/ros/humble/opt/rviz_ogre_vendor/lib:/opt/ros/humble/lib/x86_64-linux-gnu:/opt/ros/humble/lib:/usr/lib/x86_64-linux-gnu/gazebo-11/plugins::/usr/lib/x86_64-linux-gnu/gazebo-11/plugins:/usr/lib/x86_64-linux-gnu/gazebo-11/plugins::/usr/lib/x86_64-linux-gnu/gazebo-11/plugins:/usr/lib/x86_64-linux-gnu/g

GPU available: 1 device(s)


In [2]:
print(f"TensorFlow version: {tf.__version__}")
print("GPU Available:", tf.config.list_physical_devices('GPU'))
print("Built with CUDA:", tf.test.is_built_with_cuda())

TensorFlow version: 2.11.1
GPU Available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Built with CUDA: True


## Train Model with Cross-Validation

This function trains a single model across all 10 folds using k-fold cross-validation. It:
- Wraps the model builder to include custom metrics (precision, recall, F1)
- Trains on each fold with early stopping (patience=5)
- Monitors GPU/CPU energy consumption during training
- Saves the best model for each fold based on validation loss
- Returns aggregated metrics across all folds

In [3]:
def train_model_with_metrics(model_builder, model_name, early_stopping_patience=5, monitor_resources=True):
    """Wrapper to pass metrics to model builder."""
    def builder_with_metrics(input_shape):
        return model_builder(
            input_shape=input_shape,
            metrics=['accuracy', precision_m, recall_m, f1_m]
        )
    
    metrics = cross_validate(
        model_builder=builder_with_metrics,
        model_name=model_name,
        folds_dir='new_Data_particions',
        num_folds=10,
        epochs=30,
        batch_size=128,
        save_dir='trained_models',
        monitor_resources=monitor_resources,
        early_stopping_patience=5
    )
    
    tf.keras.backend.clear_session()
    gc.collect()
    
    return metrics

## Train Individual Model

Train a specific model architecture by changing the parameters:
- `build_model_X` - Replace X with model number (1-10) to select architecture
- `'model_X'` - Model name for saving (use same number as build function)
- Early stopping with patience of 5 epochs
- Resource monitoring enabled (tracks CPU/GPU energy consumption)
- Results saved to `trained_models/model_1_fold_X_best.h5`

In [4]:
metrics_1 = train_model_with_metrics(build_model_1, 'model_1', early_stopping_patience=5, monitor_resources=True)


################################################################################
STARTING 10-FOLD CROSS-VALIDATION FOR model_1
################################################################################


TRAINING FOLD 1 - model_1

GPU monitoring initialized: 1 device(s) found
Resource monitoring started
Loaded fold: Train=77912, Val=8253, Test=7521


2025-10-02 19:20:45.728773: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/30
Epoch 1: val_loss improved from inf to 2.73371, saving model to trained_models/model_1_fold_1_best.h5

NEW BEST METRICS at Epoch 1
Accuracy: 0.2007 | Precision: 0.5366 | Recall: 0.0183 | F1: 0.0351

Epoch 2/30
Epoch 2: val_loss improved from 2.73371 to 2.40942, saving model to trained_models/model_1_fold_1_best.h5

NEW BEST METRICS at Epoch 2
Accuracy: 0.3049 | Precision: 0.6656 | Recall: 0.0537 | F1: 0.0984

Epoch 3/30
Epoch 3: val_loss improved from 2.40942 to 2.29840, saving model to trained_models/model_1_fold_1_best.h5

NEW BEST METRICS at Epoch 3
Accuracy: 0.3337 | Precision: 0.7053 | Recall: 0.0746 | F1: 0.1338

Epoch 4/30
Epoch 4: val_loss improved from 2.29840 to 2.22864, saving model to trained_models/model_1_fold_1_best.h5

NEW BEST METRICS at Epoch 4
Accuracy: 0.3504 | Precision: 0.6931 | Recall: 0.1030 | F1: 0.1777

Epoch 5/30
Epoch 5: val_loss improved from 2.22864 to 2.20796, saving model to trained_models/model_1_fold_1_best.h5
Epoch 6/30
Epoch 6: val_loss im

In [None]:
if 'resource_stats' in metrics_1:
    print("\n" + "="*80)
    print("RESOURCE USAGE - MODEL 1")
    print("="*80)
    
    total_time = sum(s['duration_seconds'] for s in metrics_1['resource_stats']) / 60
    print(f"\nTotal training time: {total_time:.1f} minutes")
    
    if 'gpu_power_mean_w' in metrics_1['resource_stats'][0]:
        total_energy = sum(s['gpu_energy_wh'] for s in metrics_1['resource_stats'])
        avg_power = np.mean([s['gpu_power_mean_w'] for s in metrics_1['resource_stats']])
        print(f"Average GPU power: {avg_power:.1f} W")
        print(f"Total energy consumed: {total_energy:.3f} Wh")
    print("="*80 + "\n")

## Train Single Model (Memory-Constrained Approach)

Train one model at a time to avoid RAM exhaustion. Change `model_to_train` to train different architectures:
```python
model_to_train = ('model_X', build_model_X)  # Replace X with 1-10
```

To train multiple models automatically Replace the single tuple with a loop:
```python
models_to_train = [
    ('model_1', build_model_1),
    ('model_2', build_model_2),
    ('model_3', build_model_3)
]

for model_name, model_builder in models_to_train:
    # Training code here

In [4]:
model_to_train = ('model_1', build_model_1)

model_name, model_builder = model_to_train

print(f"\n{'#'*80}")
print(f"STARTING TRAINING: {model_name}")
print(f"{'#'*80}\n")

try:
    metrics = train_model_with_metrics(model_builder, model_name, 
                                      early_stopping_patience=5, 
                                      monitor_resources=True)
    
    # Save immediately
    os.makedirs('saved_metrics', exist_ok=True)
    with open(f'saved_metrics/{model_name}_metrics.pkl', 'wb') as f:
        pickle.dump(metrics, f)
    print(f"\nSaved {model_name} metrics to saved_metrics/{model_name}_metrics.pkl")
    
except Exception as e:
    print(f"Error training {model_name}: {e}")
    import traceback
    traceback.print_exc()
finally:
    tf.keras.backend.clear_session()
    gc.collect()


################################################################################
STARTING TRAINING: model_1
################################################################################


################################################################################
STARTING 10-FOLD CROSS-VALIDATION FOR model_1
################################################################################


TRAINING FOLD 1 - model_1

GPU monitoring initialized: 1 device(s) found
Permission denied for /sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj. Run with sudo or add permissions.
Resource monitoring started
Loaded fold: Train=77912, Val=8253, Test=7521
Epoch 1/30
Epoch 1: val_loss improved from inf to 2.77132, saving model to trained_models/model_1_fold_1_best.h5

NEW BEST METRICS at Epoch 1
Accuracy: 0.1876 | Precision: 0.3392 | Recall: 0.0054 | F1: 0.0106

Epoch 2/30
Epoch 2: val_loss improved from 2.77132 to 2.64797, saving model to trained_models/model_1_fold_1_best.h5

NEW BEST MET

In [10]:
with open('saved_metrics/model_1_metrics.pkl', 'rb') as f:
    metrics = pickle.load(f)

# View the structure
print("Keys in metrics:")
print(metrics.keys())

pprint.pprint(metrics)

Keys in metrics:
[{'model_name': 'model_1',
  'resource_stats': [{'cpu_freq_mean': 3987.050444915254,
                      'cpu_usage_max': 72.7,
                      'cpu_usage_mean': 33.76101694915254,
                      'duration_seconds': 64.58969044685364,
                      'gpu_energy_wh': 0.7884506694278888,
                      'gpu_memory_max_mb': 1661.4375,
                      'gpu_memory_mean_mb': 1589.6694915254238,
                      'gpu_power_max_w': 47.398,
                      'gpu_power_mean_w': 43.94544067796609,
                      'gpu_usage_max': 37.0,
                      'gpu_usage_mean': 28.89830508474576,
                      'ram_usage_max': 40.0,
                      'ram_usage_mean': 36.777966101694915},
                     {'cpu_freq_mean': 3977.981664351853,
                      'cpu_usage_max': 46.8,
                      'cpu_usage_mean': 28.44814814814814,
                      'duration_seconds': 58.64923596382141,
             

## Consolidate All Model Metrics

Load individual model metrics and save them into a single pickle file for easier analysis.

**What this does:**
- Searches for `model_1_metrics.pkl` through `model_10_metrics.pkl`
- Loads all found metric files
- Combines them into a single list
- Saves consolidated metrics to `saved_metrics/all_metrics.pkl`

In [6]:
def load_all_metrics(metrics_dir='saved_metrics'):
    """Load all model metrics from pickle files."""
    all_metrics = []
    for i in range(1, 11):
        filepath = os.path.join(metrics_dir, f'model_{i}_metrics.pkl')
        if os.path.exists(filepath):
            with open(filepath, 'rb') as f:
                metrics = pickle.load(f)
                all_metrics.append(metrics)
            print(f"Loaded {metrics['model_name']}")
    return all_metrics

all_metrics = load_all_metrics('saved_metrics')

with open('saved_metrics/all_metrics.pkl', 'wb') as f:
        pickle.dump(all_metrics, f)

Loaded model_1
Loaded model_2
Loaded model_3
Loaded model_4
Loaded model_5
Loaded model_6
Loaded model_7
Loaded model_8
Loaded model_9
Loaded model_10


## Compare Model Performance

Display a summary table comparing test set performance across all trained models:
- **Test Accuracy** - Average accuracy across 10 folds
- **Test Precision** - Weighted precision score
- **Test Recall** - Weighted recall score
- **Test F1-Score** - Harmonic mean of precision and recall

Identifies the best performing model based on test accuracy.

In [7]:
def compare_models(all_metrics):
    """Compare results across all trained models."""
    print(f"\n{'='*100}")
    print("MODEL COMPARISON - TEST SET RESULTS")
    print(f"{'='*100}\n")
    
    print(f"{'Model':<15} {'Test Acc':<12} {'Test Prec':<12} {'Test Recall':<12} {'Test F1':<12}")
    print(f"{'-'*100}")
    
    for metrics in all_metrics:
        model_name = metrics['model_name']
        test_acc = np.mean(metrics['test_acc'])
        test_prec = np.mean(metrics['test_precision'])
        test_recall = np.mean(metrics['test_recall'])
        test_f1 = np.mean(metrics['test_f1'])
        
        print(f"{model_name:<15} {test_acc:>10.2f}%  {test_prec:>10.4f}  {test_recall:>10.4f}  {test_f1:>10.4f}")
    
    print(f"{'='*100}\n")
    
    # Find best model
    best_model = max(all_metrics, key=lambda x: np.mean(x['test_acc']))
    print(f"Best Model: {best_model['model_name']} with {np.mean(best_model['test_acc']):.2f}% test accuracy\n")

compare_models(all_metrics)


MODEL COMPARISON - TEST SET RESULTS

Model           Test Acc     Test Prec    Test Recall  Test F1     
----------------------------------------------------------------------------------------------------
model_1              42.27%      0.6791      0.2054      0.3001
model_2              55.86%      0.7305      0.4193      0.5212
model_3              81.10%      0.8747      0.7731      0.8196
model_4              64.45%      0.7647      0.5648      0.6471
model_5              78.30%      0.8417      0.7478      0.7910
model_6              80.88%      0.8742      0.7675      0.8162
model_7              88.61%      0.9205      0.8690      0.8935
model_8              84.80%      0.8959      0.8222      0.8566
model_9              88.34%      0.9192      0.8630      0.8895
model_10             87.31%      0.9133      0.8505      0.8801

Best Model: model_7 with 88.61% test accuracy



## Compare Energy Consumption & Efficiency

Analyze energy consumption and computational efficiency for each model:
- **Total Time** - Combined training time across all folds (minutes)
- **Avg GPU Power** - Average GPU power draw during training (Watts)
- **Total Energy** - Cumulative energy consumption (Watt-hours)
- **Efficiency** - Test accuracy per Wh (higher = more efficient)

**Efficiency metric** shows which models achieve the best accuracy relative to energy consumed. Useful for identifying models suitable for resource-constrained or sustainable deployment.

**Note:** Only displays energy data if GPU monitoring was enabled during training.

In [8]:
def compare_energy(all_metrics):
    """Compare energy consumption across models."""
    print(f"\n{'='*100}")
    print("ENERGY CONSUMPTION & EFFICIENCY COMPARISON")
    print(f"{'='*100}\n")
    
    print(f"{'Model':<15} {'Total Time':<15} {'Avg GPU Power':<15} {'Total Energy':<15} {'Test Acc':<12} {'Efficiency':<15}")
    print(f"{'-'*100}")
    
    for metrics in all_metrics:
        model_name = metrics['model_name']
        test_acc = np.mean(metrics['test_acc'])
        
        if 'resource_stats' in metrics and metrics['resource_stats']:
            total_time = sum(s['duration_seconds'] for s in metrics['resource_stats']) / 60
            
            if 'gpu_power_mean_w' in metrics['resource_stats'][0]:
                avg_power = np.mean([s['gpu_power_mean_w'] for s in metrics['resource_stats']])
                total_energy = sum(s['gpu_energy_wh'] for s in metrics['resource_stats'])
                
                # Efficiency: Accuracy per Wh
                efficiency = test_acc / total_energy if total_energy > 0 else 0
                
                print(f"{model_name:<15} {total_time:>13.1f}m  {avg_power:>13.1f}W  {total_energy:>13.3f}Wh  {test_acc:>10.2f}%  {efficiency:>13.2f}%/Wh")
            else:
                print(f"{model_name:<15} {total_time:>13.1f}m  {'CPU only':<15} {'N/A':<15} {test_acc:>10.2f}%  {'N/A':<15}")
        else:
            print(f"{model_name:<15} {'N/A':<15} {'N/A':<15} {'N/A':<15} {test_acc:>10.2f}%  {'N/A':<15}")
    
    print(f"{'='*100}\n")

compare_energy(all_metrics)


ENERGY CONSUMPTION & EFFICIENCY COMPARISON

Model           Total Time      Avg GPU Power   Total Energy    Test Acc     Efficiency     
----------------------------------------------------------------------------------------------------
model_1                   9.8m           45.1W          7.332Wh       42.27%           5.76%/Wh
model_2                  15.4m           45.7W         11.688Wh       55.86%           4.78%/Wh
model_3                  16.6m           47.7W         13.231Wh       81.10%           6.13%/Wh
model_4                  10.9m           61.4W         11.156Wh       64.45%           5.78%/Wh
model_5                  10.5m           47.3W          8.271Wh       78.30%           9.47%/Wh
model_6                  16.1m           47.9W         12.865Wh       80.88%           6.29%/Wh
model_7                  10.3m           83.3W         14.330Wh       88.61%           6.18%/Wh
model_8                  14.7m           95.2W         23.307Wh       84.80%           3.

## Calculate Electricity Costs

Estimate the electricity costs for training all models based on energy consumption.

**Default rate:** $0.12 per kWh (adjust `electricity_rate` parameter for your local rates)

**Output:**
- Cost per model
- Total cost for training all models

**Purpose:** Helps quantify the real-world cost of model development and compare the economic efficiency of different architectures.

**Note:** Only calculates costs if GPU energy data was collected during training.

In [9]:
def calculate_costs(all_metrics, electricity_rate=0.12):
    """Calculate estimated electricity costs."""
    print(f"\n{'='*80}")
    print("ESTIMATED ELECTRICITY COSTS")
    print(f"(Rate: ${electricity_rate:.2f} per kWh)")
    print(f"{'='*80}\n")
    
    total_cost = 0
    for metrics in all_metrics:
        if 'resource_stats' in metrics and metrics['resource_stats']:
            if 'gpu_power_mean_w' in metrics['resource_stats'][0]:
                total_energy_kwh = sum(s['gpu_energy_wh'] for s in metrics['resource_stats']) / 1000
                cost = total_energy_kwh * electricity_rate
                total_cost += cost
                print(f"{metrics['model_name']:<15}: ${cost:.4f} ({total_energy_kwh:.6f} kWh)")
    
    if total_cost > 0:
        print(f"\n{'Total cost for all models:':<15} ${total_cost:.4f}")
    else:
        print("No GPU energy data available")
    print("="*80 + "\n")

calculate_costs(all_metrics)


ESTIMATED ELECTRICITY COSTS
(Rate: $0.12 per kWh)

model_1        : $0.0009 (0.007332 kWh)
model_2        : $0.0014 (0.011688 kWh)
model_3        : $0.0016 (0.013231 kWh)
model_4        : $0.0013 (0.011156 kWh)
model_5        : $0.0010 (0.008271 kWh)
model_6        : $0.0015 (0.012865 kWh)
model_7        : $0.0017 (0.014330 kWh)
model_8        : $0.0028 (0.023307 kWh)
model_9        : $0.0027 (0.022135 kWh)
model_10       : $0.0065 (0.054284 kWh)

Total cost for all models: $0.0214



## Fine-tune Single Model

Fine-tune a specific pre-trained model on the fine-tuning dataset. Change `model_name='model_X'` to fine-tune different models (1-10).

**Parameters:**
- `epochs=10` - Fine-tuning epochs (fewer than initial training)
- `batch_size=128` - Batch size for fine-tuning
- `monitor_resources=True` - Track CPU/GPU energy consumption

**Output:** Fine-tuned models saved to `finetuned_models/model_X_fold_Y_finetuned.h5`

In [10]:
finetuned_metrics_1 = finetune_all_folds(
    model_name='model_1',
    folds_dir='new_Data_particions',
    models_dir='trained_models',
    num_folds=10,
    epochs=10,
    batch_size=128,
    save_dir='finetuned_models',
    monitor_resources=True  
)


################################################################################
FINE-TUNING model_1 ACROSS 10 FOLDS
################################################################################


FINE-TUNING FOLD 1 - model_1

Loaded: Fine-tune=2116, Final Test=7521
Fine-tuning with 2116 samples for 10 epochs...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

FOLD 1 FINE-TUNING RESULTS
Final Test - Loss: 2.0095, Acc: 41.10%
Precision: 0.6484, Recall: 0.2054, F1: 0.3064

Saved fine-tuned model to: finetuned_models/model_1_fold_1_finetuned.h5

FINE-TUNING FOLD 2 - model_1

Loaded: Fine-tune=2201, Final Test=7524
Fine-tuning with 2201 samples for 10 epochs...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

FOLD 2 FINE-TUNING RESULTS
Final Test - Loss: 1.8598, Acc: 46.25%
Precision: 0.6645, Recall: 0.2921, F1: 0.4016

Saved fine-tuned model to: finetuned_model

In [None]:
compare_before_after_finetuning(metrics_1, finetuned_metrics_1)

## Fine-tune All Trained Models

Automatically fine-tune all previously trained models and compare performance before/after fine-tuning.

**Process:**
1. Loads each model from `all_metrics`
2. Fine-tunes on the fine-tuning dataset (10 epochs)
3. Evaluates on final test set
4. Displays before/after comparison for each model
5. Saves fine-tuned models and metrics

**Memory Management:** Uses `clear_session()` and garbage collection after each model to prevent RAM overflow.

**Output:** 
- Fine-tuned models saved to `finetuned_models/`
- Consolidated metrics in `all_finetuned_metrics`

**Note:** Fine-tuning typically takes 10-20% of the time required for initial training.

In [10]:
all_finetuned_metrics = []

for metrics in all_metrics:
    model_name = metrics['model_name']
    print(f"\nFine-tuning {model_name}...")
    
    try:
        finetuned = finetune_all_folds(
            model_name=model_name,
            num_folds=10,
            epochs=10,
            monitor_resources=True
        )
        all_finetuned_metrics.append(finetuned)
        
        compare_before_after_finetuning(metrics, finetuned)
    except Exception as e:
        print(f"Error fine-tuning {model_name}: {e}")
    finally:
        tf.keras.backend.clear_session()
        gc.collect()


Fine-tuning model_1...

################################################################################
FINE-TUNING model_1 ACROSS 10 FOLDS
################################################################################


FINE-TUNING FOLD 1 - model_1

GPU monitoring initialized: 1 device(s) found
Resource monitoring started
Loaded: Fine-tune=2116, Final Test=7521
Fine-tuning with 2116 samples for 10 epochs...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Resource monitoring stopped

RESOURCE USAGE STATISTICS
Duration: 2.2 seconds (0.0 minutes)

CPU:
  Usage (mean): 25.7%
  Usage (max):  35.4%
  Frequency:    4006 MHz

RAM:
  Usage (mean): 39.3%
  Usage (max):  39.7%

GPU:
  Usage (mean):  13.0%
  Usage (max):   22.0%
  Memory (mean): 3096 MB
  Memory (max):  3096 MB
  Power (mean):  40.4 W
  Power (max):   41.0 W
  Energy used:   0.025 Wh


FOLD 1 FINE-TUNING RESULTS
Final Test - Loss: 2.5043, Acc: 25.34%
Precision: 0.

## Fine-tuning Energy & Cost Analysis

Compare energy consumption and costs between initial training and fine-tuning:

**Metrics displayed:**
- **FT Time** - Fine-tuning duration across all folds
- **FT Energy** - Energy consumed during fine-tuning only
- **Final Acc** - Test accuracy after fine-tuning
- **Total Energy** - Combined energy (training + fine-tuning)

**Cost Analysis:**
- Individual fine-tuning cost per model
- Total fine-tuning cost for all models
- Based on electricity rate ($0.12/kWh by default)

**Use Case:** Evaluate whether fine-tuning provides sufficient accuracy improvement to justify the additional energy cost and training time.

In [11]:
if all_finetuned_metrics:
    print("\n" + "="*100)
    print("FINE-TUNING ENERGY COMPARISON")
    print("="*100 + "\n")
    
    print(f"{'Model':<15} {'FT Time':<12} {'FT Energy':<15} {'Final Acc':<12} {'Total Energy':<15}")
    print("-"*100)
    
    for i, (orig_metrics, ft_metrics) in enumerate(zip(all_metrics, all_finetuned_metrics)):
        model_name = orig_metrics['model_name']
        final_acc = np.mean(ft_metrics['test_acc'])
        
        if 'resource_stats' in ft_metrics and 'gpu_power_mean_w' in ft_metrics['resource_stats'][0]:
            ft_time = sum(s['duration_seconds'] for s in ft_metrics['resource_stats']) / 60
            ft_energy = sum(s['gpu_energy_wh'] for s in ft_metrics['resource_stats'])
            
            # Get original training energy
            orig_energy = 0
            if 'resource_stats' in orig_metrics and 'gpu_power_mean_w' in orig_metrics['resource_stats'][0]:
                orig_energy = sum(s['gpu_energy_wh'] for s in orig_metrics['resource_stats'])
            
            total_energy = orig_energy + ft_energy
            
            print(f"{model_name:<15} {ft_time:>10.1f}m  {ft_energy:>13.3f}Wh  {final_acc:>10.2f}%  {total_energy:>13.3f}Wh")
    
    print("="*100 + "\n")
    
    electricity_rate = 0.12
    print(f"\nESTIMATED FINE-TUNING COSTS (Rate: ${electricity_rate:.2f}/kWh):")
    total_ft_cost = 0
    
    for ft_metrics in all_finetuned_metrics:
        if 'resource_stats' in ft_metrics and 'gpu_power_mean_w' in ft_metrics['resource_stats'][0]:
            ft_energy_kwh = sum(s['gpu_energy_wh'] for s in ft_metrics['resource_stats']) / 1000
            cost = ft_energy_kwh * electricity_rate
            total_ft_cost += cost
            print(f"  {ft_metrics['model_name']:<15}: ${cost:.4f}")
    
    if total_ft_cost > 0:
        print(f"\n  Total fine-tuning cost: ${total_ft_cost:.4f}")


FINE-TUNING ENERGY COMPARISON

Model           FT Time      FT Energy       Final Acc    Total Energy   
----------------------------------------------------------------------------------------------------
model_1                0.2m          0.154Wh       37.15%         12.867Wh
model_2                0.2m          0.163Wh       56.05%         11.851Wh
model_3                0.4m          0.342Wh       82.08%         13.573Wh
model_4                0.2m          0.223Wh       65.74%         11.379Wh
model_5                0.4m          0.333Wh       79.18%          8.604Wh
model_6                0.4m          0.305Wh       81.78%         13.169Wh
model_7                0.4m          0.330Wh       90.03%         14.661Wh
model_8                0.4m          0.529Wh       86.00%         23.835Wh
model_9                0.6m          0.687Wh       89.94%         22.822Wh
model_10               1.1m          1.947Wh       89.25%         56.231Wh


ESTIMATED FINE-TUNING COSTS (Rate: $0.12/

## Save Fine-tuned Model Metrics

Save all fine-tuned model metrics to a consolidated pickle file for later analysis and visualization.

**Output:** `saved_finetuned_metrics/all_finetuned_metrics.pkl`

**Purpose:** Preserves fine-tuning results for:
- Comparison with original trained models
- Energy consumption analysis
- Performance visualization
- Future reference without re-running fine-tuning

**Note:** Only saves if fine-tuning has been completed (checks if `all_finetuned_metrics` exists in memory).

In [29]:
os.makedirs('saved_finetuned_metrics', exist_ok=True)
if 'all_finetuned_metrics' in locals():
    with open('saved_finetuned_metrics/all_finetuned_metrics.pkl', 'wb') as f:
        pickle.dump(all_finetuned_metrics, f)
    print("Saved all_finetuned_metrics to all_finetuned_metrics.pkl")

Saved all_finetuned_metrics to all_finetuned_metrics.pkl


## Compare all model

Compare all models before and after fine-tuning.

- Normal models: individual pickle files (model_1_metrics.pkl, model_2_metrics.pkl, ...)
- Fine-tuned models: single pickle file with list of all models

In [27]:
def compare_before_after_all_models(normal_metrics_dir='saved_metrics', 
                                     finetuned_file='saved_finetuned_metrics/all_finetuned_metrics.pkl'):
    """
    Compare all models before and after fine-tuning.
    Normal models: individual pickle files (model_1_metrics.pkl, model_2_metrics.pkl, ...)
    Fine-tuned models: single pickle file with list of all models
    """
    print(f"\n{'='*120}")
    print("COMPREHENSIVE COMPARISON: NORMAL vs FINE-TUNED MODELS")
    print(f"{'='*120}\n")
    
    # Load normal models
    normal_models = []
    for i in range(1, 11):
        filepath = os.path.join(normal_metrics_dir, f'model_{i}_metrics.pkl')
        if os.path.exists(filepath):
            with open(filepath, 'rb') as f:
                normal_models.append(pickle.load(f))
    
    # Load fine-tuned models
    if not os.path.exists(finetuned_file):
        print(f"Fine-tuned metrics file not found: {finetuned_file}")
        return
    
    with open(finetuned_file, 'rb') as f:
        finetuned_models = pickle.load(f)
    
    # Match models by name
    matched_pairs = []
    for normal in normal_models:
        normal_name = normal['model_name']
        # Find matching fine-tuned model
        finetuned = next((ft for ft in finetuned_models if ft['model_name'] == normal_name), None)
        if finetuned:
            matched_pairs.append((normal, finetuned))
    
    if not matched_pairs:
        print("No matching model pairs found!")
        return
    
    print(f"Found {len(matched_pairs)} model pairs to compare\n")
    
    # Detailed comparison for each model
    for normal, finetuned in matched_pairs:
        model_name = normal['model_name']
        
        print(f"\n{'#'*120}")
        print(f"MODEL: {model_name}")
        print(f"{'#'*120}\n")
        
        # Calculate metrics
        metrics_comparison = {
            'Test Accuracy (%)': ('test_acc', '%'),
            'Test Precision': ('test_precision', ''),
            'Test Recall': ('test_recall', ''),
            'Test F1 Score': ('test_f1', ''),
        }
        
        print(f"{'Metric':<20} {'Normal':<15} {'Fine-tuned':<15} {'Difference':<15} {'% Change':<15}")
        print(f"{'-'*120}")
        
        for metric_name, (key, unit) in metrics_comparison.items():
            normal_mean = np.mean(normal[key])
            finetuned_mean = np.mean(finetuned[key])
            
            numeric_diff = finetuned_mean - normal_mean
            percent_change = (numeric_diff / normal_mean) * 100 if normal_mean != 0 else 0
            
            if unit == '%':
                print(f"{metric_name:<20} {normal_mean:>13.2f}%  {finetuned_mean:>13.2f}%  {numeric_diff:>+13.2f}%  {percent_change:>+13.2f}%")
            else:
                print(f"{metric_name:<20} {normal_mean:>13.4f}  {finetuned_mean:>13.4f}  {numeric_diff:>+13.4f}  {percent_change:>+13.2f}%")
        
        # Energy comparison if available
        if 'resource_stats' in normal and 'resource_stats' in finetuned:
            if normal['resource_stats'] and finetuned['resource_stats']:
                if 'gpu_energy_wh' in normal['resource_stats'][0] and 'gpu_energy_wh' in finetuned['resource_stats'][0]:
                    print(f"\n{'Energy & Time':<20} {'Normal':<15} {'Fine-tuned':<15} {'Difference':<15} {'% Change':<15}")
                    print(f"{'-'*120}")
                    
                    normal_energy = sum(s['gpu_energy_wh'] for s in normal['resource_stats'])
                    finetuned_energy = sum(s['gpu_energy_wh'] for s in finetuned['resource_stats'])
                    energy_diff = finetuned_energy - normal_energy
                    energy_percent = (energy_diff / normal_energy) * 100 if normal_energy != 0 else 0
                    
                    print(f"{'GPU Energy (Wh)':<20} {normal_energy:>13.3f}  {finetuned_energy:>13.3f}  {energy_diff:>+13.3f}  {energy_percent:>+13.2f}%")
                    
                    normal_time = sum(s['duration_seconds'] for s in normal['resource_stats']) / 60
                    finetuned_time = sum(s['duration_seconds'] for s in finetuned['resource_stats']) / 60
                    time_diff = finetuned_time - normal_time
                    time_percent = (time_diff / normal_time) * 100 if normal_time != 0 else 0
                    
                    print(f"{'Training Time (min)':<20} {normal_time:>13.1f}  {finetuned_time:>13.1f}  {time_diff:>+13.1f}  {time_percent:>+13.2f}%")
                    
                    total_energy = normal_energy + finetuned_energy
                    print(f"{'Total Energy (Wh)':<20} {'-':<15} {'-':<15} {total_energy:>13.3f}")
    
    # Overall summary across all models
    print(f"\n{'='*120}")
    print("OVERALL SUMMARY - AVERAGE ACROSS ALL MODELS")
    print(f"{'='*120}\n")
    
    print(f"{'Metric':<20} {'Avg Normal':<15} {'Avg Fine-tuned':<15} {'Avg Difference':<15} {'Avg % Change':<15}")
    print(f"{'-'*120}")
    
    for metric_name, (key, unit) in metrics_comparison.items():
        all_normal = [np.mean(m[0][key]) for m in matched_pairs]
        all_finetuned = [np.mean(m[1][key]) for m in matched_pairs]
        
        avg_normal = np.mean(all_normal)
        avg_finetuned = np.mean(all_finetuned)
        avg_diff = avg_finetuned - avg_normal
        avg_percent = (avg_diff / avg_normal) * 100 if avg_normal != 0 else 0
        
        if unit == '%':
            print(f"{metric_name:<20} {avg_normal:>13.2f}%  {avg_finetuned:>13.2f}%  {avg_diff:>+13.2f}%  {avg_percent:>+13.2f}%")
        else:
            print(f"{metric_name:<20} {avg_normal:>13.4f}  {avg_finetuned:>13.4f}  {avg_diff:>+13.4f}  {avg_percent:>+13.2f}%")
    
    print(f"{'='*120}\n")


In [28]:
compare_before_after_all_models('saved_metrics', 'saved_finetuned_metrics/all_finetuned_metrics.pkl')


COMPREHENSIVE COMPARISON: NORMAL vs FINE-TUNED MODELS

Found 10 model pairs to compare


########################################################################################################################
MODEL: model_1
########################################################################################################################

Metric               Normal          Fine-tuned      Difference      % Change       
------------------------------------------------------------------------------------------------------------------------
Test Accuracy (%)            36.85%          37.15%          +0.29%          +0.79%
Test Precision              0.5362         0.5312        -0.0050          -0.93%
Test Recall                 0.1586         0.1679        +0.0093          +5.84%
Test F1 Score               0.2353         0.2453        +0.0100          +4.23%

Energy & Time        Normal          Fine-tuned      Difference      % Change       
---------------------------------

## Measure Test Energy Consumption

Measure CPU and GPU energy consumption during model inference on the test set.

**What this does:**
- Loads trained models (.h5 files)
- Runs inference on the final test set
- Monitors CPU energy (via RAPL) and GPU energy (via NVML)
- Records power consumption, duration, and accuracy
- Saves detailed results to CSV

**Requirements:**
- RAPL permissions: `sudo chmod -R a+r /sys/class/powercap/intel-rapl/`
- GPU monitoring: `pynvml` library installed

**Output CSV columns:**
- `model`, `fold` - Model identification
- `test_acc` - Test accuracy
- `cpu_energy_wh`, `gpu_energy_wh` - Energy consumed by each device
- `cpu_power_mean_w`, `gpu_power_mean_w` - Average power draw
- `duration_s` - Inference time

**Purpose:** Separate test energy from training energy for fair comparison between deployment scenarios (Keras vs TFLite, CPU vs GPU).

**Note:** This measures inference energy only, not training energy. Run after all models are trained.

In [16]:
df_normal = measure_all_test_power(
    models_dir='trained_models',
    output_file='saved_metrics/normal_models_test_power.csv'
)


MEASURING TEST POWER FOR ALL NORMAL MODELS


model_1:
  Fold 1... GPU monitoring initialized: 1 device(s) found
CPU energy monitoring (RAPL) initialized: 1 domain(s) found
  - package-0 (intel-rapl:0)
Resource monitoring started
Resource monitoring stopped
CPU: 63.7W/0.003693Wh | GPU: 39.9W/0.002314Wh
  Fold 2... GPU monitoring initialized: 1 device(s) found
CPU energy monitoring (RAPL) initialized: 1 domain(s) found
  - package-0 (intel-rapl:0)
Resource monitoring started
Resource monitoring stopped
CPU: 69.7W/0.003972Wh | GPU: 40.1W/0.002287Wh
  Fold 3... GPU monitoring initialized: 1 device(s) found
CPU energy monitoring (RAPL) initialized: 1 domain(s) found
  - package-0 (intel-rapl:0)
Resource monitoring started
Resource monitoring stopped
CPU: 39.8W/0.004601Wh | GPU: 40.0W/0.004620Wh
  Fold 4... GPU monitoring initialized: 1 device(s) found
CPU energy monitoring (RAPL) initialized: 1 domain(s) found
  - package-0 (intel-rapl:0)
Resource monitoring started
Resource monitoring sto

In [14]:
df_finetuned = measure_all_test_power(
    models_dir='finetuned_models',
    output_file='saved_finetuned_metrics/finetuned_models_test_power.csv',
    is_finetuned=True
)


MEASURING TEST POWER FOR ALL FINE-TUNED MODELS


model_1:
  Fold 1... GPU monitoring initialized: 1 device(s) found
CPU energy monitoring (RAPL) initialized: 1 domain(s) found
  - package-0 (intel-rapl:0)
Resource monitoring started
Resource monitoring stopped
CPU: 72.8W/0.004157Wh | GPU: 53.0W/0.003029Wh
  Fold 2... GPU monitoring initialized: 1 device(s) found
CPU energy monitoring (RAPL) initialized: 1 domain(s) found
  - package-0 (intel-rapl:0)
Resource monitoring started
Resource monitoring stopped
CPU: 39.0W/0.004445Wh | GPU: 52.6W/0.005990Wh
  Fold 3... GPU monitoring initialized: 1 device(s) found
CPU energy monitoring (RAPL) initialized: 1 domain(s) found
  - package-0 (intel-rapl:0)
Resource monitoring started
Resource monitoring stopped
CPU: 72.4W/0.004116Wh | GPU: 47.7W/0.002714Wh
  Fold 4... GPU monitoring initialized: 1 device(s) found
CPU energy monitoring (RAPL) initialized: 1 domain(s) found
  - package-0 (intel-rapl:0)
Resource monitoring started
Resource monitoring