In [1]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from datetime import datetime

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

if 'google.colab' in sys.modules:
    src_path = '/content/Multimodal-Vehicle-Damage-Assessor/src'
    os.chdir('/content/Multimodal-Vehicle-Damage-Assessor')
    sys.path.insert(0, src_path)
else:
  sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))

import config
from data_loader import create_tf_dataset, plot_sample_images
from model_builder import build_model, unfreeze_model
from train_utils import (
    compile_model, get_callbacks, train_model, 
    evaluate_model, get_predictions, calculate_metrics,
    save_training_results, compare_models as compare_model_metrics
)
from visualize import (
    plot_training_history, plot_confusion_matrix,
    plot_classification_report, plot_roc_curves,
    compare_models, plot_model_comparison_table,
    plot_per_class_performance, create_comprehensive_report
)


NameError: name '__file__' is not defined

## 2. Data Loading & Exploration

Load the dataset and visualize sample images.

In [None]:
# Count images in each class
print("\nDataset Statistics:")
print("-" * 40)

for split in ['training', 'validation']:
    print(f"\n{split.upper()}:")
    split_dir = os.path.join(config.SEVERITY_DATA_DIR, split)
    
    for class_name in config.SEVERITY_CLASS_NAMES:
        class_folder = [f for f in os.listdir(split_dir) if class_name.lower() in f.lower()]
        if class_folder:
            class_path = os.path.join(split_dir, class_folder[0])
            count = len(os.listdir(class_path))
            print(f"  {class_name}: {count} images")

print("\n" + "="*60)

In [None]:
# Load dataset for visualization (using EfficientNetB4 preprocessing)
print("\nLoading dataset for visualization...")
sample_train_ds, sample_val_ds, sample_info = create_tf_dataset('efficientnet_b4')

print("\nDataset Info:")
for key, value in sample_info.items():
    print(f"  {key}: {value}")

In [None]:
# Visualize sample images
fig = plot_sample_images(sample_train_ds, num_images=16)
plt.show()

## 3. Model Training

Train all models and compare their performance.

In [None]:
# Dictionary to store all results
all_results = {}
all_histories = {}
all_checkpoints = {}

# Set random seeds for reproducibility
np.random.seed(config.RANDOM_SEED)
tf.random.set_seed(config.RANDOM_SEED)

print("\n" + "="*80)
print("STARTING MODEL TRAINING")
print("="*80 + "\n")

In [None]:
# Train each model
for model_name in config.MODELS_TO_TRAIN:
    print("\n" + "="*80)
    print(f"TRAINING: {model_name.upper()}")
    print("="*80 + "\n")
    
    try:
        # 1. Create dataset with model-specific preprocessing
        print(f"Step 1: Creating dataset for {model_name}...")
        train_ds, val_ds, ds_info = create_tf_dataset(model_name)
        
        # 2. Build model
        print(f"\nStep 2: Building {model_name} model...")
        model, base_model = build_model(model_name, num_classes=config.SEVERITY_NUM_CLASSES)
        
        # 3. Compile model
        print(f"\nStep 3: Compiling model...")
        model = compile_model(model, learning_rate=config.INITIAL_LEARNING_RATE)
        
        # 4. Get callbacks
        print(f"\nStep 4: Setting up callbacks...")
        callbacks, checkpoint_path = get_callbacks(model_name, stage='initial')
        
        # 5. Train model
        print(f"\nStep 5: Training model...")
        start_time = datetime.now()
        history, checkpoint_path = train_model(
            model, train_ds, val_ds, 
            model_name=model_name,
            epochs=config.EPOCHS,
            callbacks_list=callbacks
        )
        training_time = (datetime.now() - start_time).total_seconds()
        
        # 6. Evaluate model
        print(f"\nStep 6: Evaluating model...")
        eval_results = evaluate_model(model, val_ds, model_name)
        
        # 7. Get predictions
        print(f"\nStep 7: Generating predictions...")
        y_true, y_pred, y_pred_proba = get_predictions(model, val_ds)
        
        # 8. Calculate detailed metrics
        print(f"\nStep 8: Calculating metrics...")
        detailed_metrics = calculate_metrics(y_true, y_pred, y_pred_proba)
        
        # 9. Combine all results
        combined_results = {
            **eval_results,
            **detailed_metrics,
            'training_time': training_time,
            'checkpoint_path': checkpoint_path
        }
        
        # 10. Save results
        print(f"\nStep 9: Saving results...")
        results_file = save_training_results(
            model_name, history, combined_results, checkpoint_path
        )
        
        # Store for comparison
        all_results[model_name] = combined_results
        all_histories[model_name] = history.history
        all_checkpoints[model_name] = checkpoint_path
        
        # 11. Plot training history
        print(f"\nStep 10: Plotting training history...")
        plot_training_history(history.history, model_name, save_dir=config.RESULTS_DIR)
        plt.show()
        
        # 12. Plot confusion matrix
        print(f"\nStep 11: Plotting confusion matrix...")
        plot_confusion_matrix(y_true, y_pred, model_name, save_dir=config.RESULTS_DIR)
        plt.show()
        
        # 13. Classification report
        print(f"\nStep 12: Generating classification report...")
        plot_classification_report(y_true, y_pred, model_name, save_dir=config.RESULTS_DIR)
        
        print(f"\n{'='*80}")
        print(f"‚úì {model_name.upper()} TRAINING COMPLETED SUCCESSFULLY!")
        print(f"  - Training Time: {training_time:.2f} seconds")
        print(f"  - Val Accuracy: {eval_results['accuracy']:.4f}")
        print(f"  - Val Loss: {eval_results['loss']:.4f}")
        print(f"{'='*80}\n")
        
        # Clear memory
        tf.keras.backend.clear_session()
        
    except Exception as e:
        print(f"\n{'='*80}")
        print(f"‚úó ERROR TRAINING {model_name.upper()}: {str(e)}")
        print(f"{'='*80}\n")
        continue

print("\n" + "="*80)
print("ALL MODELS TRAINING COMPLETED!")
print("="*80 + "\n")

## 4. Model Comparison

Compare all models and visualize the results.

In [None]:
# Display comparison table
print("\n" + "="*80)
print("MODEL PERFORMANCE COMPARISON")
print("="*80 + "\n")

comparison_df = compare_model_metrics(all_results)
print(comparison_df.to_string(index=False))
print("\n" + "="*80)

In [None]:
# Visualize model comparison
fig = compare_models(all_results, save_dir=config.RESULTS_DIR)
plt.show()

In [None]:
# Create comparison table visualization
comparison_df, fig = plot_model_comparison_table(all_results, save_dir=config.RESULTS_DIR)
plt.show()

In [None]:
# Plot per-class performance
fig = plot_per_class_performance(all_results, save_dir=config.RESULTS_DIR)
plt.show()

In [None]:
# Generate comprehensive report
create_comprehensive_report(all_results, save_dir=config.RESULTS_DIR)
print("\n‚úì Comprehensive report generated successfully!")

## 5. Best Model Selection

Identify and highlight the best performing model.

In [None]:
# Find best model
best_model_name = max(all_results.items(), key=lambda x: x[1]['accuracy'])[0]
best_accuracy = all_results[best_model_name]['accuracy']
best_checkpoint = all_checkpoints[best_model_name]

print("\n" + "="*80)
print("BEST MODEL SELECTION")
print("="*80)
print(f"\nüèÜ Best Model: {best_model_name.upper()}")
print(f"   Accuracy: {best_accuracy:.4f}")
print(f"   Precision: {all_results[best_model_name]['precision']:.4f}")
print(f"   Recall: {all_results[best_model_name]['recall']:.4f}")
print(f"   F1-Score: {all_results[best_model_name]['f1_score']:.4f}")
print(f"   Training Time: {all_results[best_model_name]['training_time']:.2f} seconds")
print(f"\n   Saved at: {best_checkpoint}")
print("="*80 + "\n")

In [None]:
# Display per-class metrics for best model
print(f"\nPer-Class Performance for {best_model_name.upper()}:")
print("-" * 60)

for class_name in config.SEVERITY_CLASS_NAMES:
    if class_name in all_results[best_model_name]['per_class_metrics']:
        metrics = all_results[best_model_name]['per_class_metrics'][class_name]
        print(f"\n{class_name.upper()}:")
        print(f"  Precision: {metrics['precision']:.4f}")
        print(f"  Recall: {metrics['recall']:.4f}")
        print(f"  F1-Score: {metrics['f1_score']:.4f}")
        print(f"  Support: {metrics['support']}")

print("\n" + "="*60)

## 6. Summary & Recommendations

Based on the training results, here are the key findings:

In [None]:
# Generate summary statistics
print("\n" + "="*80)
print("TRAINING SUMMARY")
print("="*80 + "\n")

print("Models Trained:", len(all_results))
print("\nAccuracy Range:")
accuracies = [results['accuracy'] for results in all_results.values()]
print(f"  Best: {max(accuracies):.4f}")
print(f"  Worst: {min(accuracies):.4f}")
print(f"  Average: {np.mean(accuracies):.4f}")
print(f"  Std Dev: {np.std(accuracies):.4f}")

print("\nTraining Time:")
times = [results['training_time'] for results in all_results.values()]
print(f"  Fastest: {min(times):.2f} seconds")
print(f"  Slowest: {max(times):.2f} seconds")
print(f"  Average: {np.mean(times):.2f} seconds")

print("\n" + "="*80)

## Next Steps

1. **Fine-tuning**: Unfreeze some layers of the best model for additional training
2. **Hyperparameter Optimization**: Try different learning rates, batch sizes, etc.
3. **Ensemble Methods**: Combine predictions from multiple models
4. **Deploy**: Save the best model for production use
5. **Test on New Data**: Validate on unseen vehicle damage images

## Recommendations

- **For Production**: Use the best accuracy model if accuracy is critical
- **For Mobile/Edge**: Use MobileNetV2 for resource-constrained environments
- **For Balance**: Use EfficientNetB4 for best accuracy/speed tradeoff

In [None]:
# Save summary to file
summary_path = os.path.join(config.RESULTS_DIR, 'training_summary.txt')

with open(summary_path, 'w') as f:
    f.write("="*80 + "\n")
    f.write("VEHICLE DAMAGE SEVERITY CLASSIFICATION - TRAINING SUMMARY\n")
    f.write("="*80 + "\n\n")
    
    f.write(f"Training Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    f.write(f"Models Trained: {len(all_results)}\n\n")
    
    f.write("BEST MODEL\n")
    f.write("-" * 40 + "\n")
    f.write(f"Model: {best_model_name}\n")
    f.write(f"Accuracy: {best_accuracy:.4f}\n")
    f.write(f"Checkpoint: {best_checkpoint}\n\n")
    
    f.write("ALL MODELS PERFORMANCE\n")
    f.write("-" * 40 + "\n")
    f.write(comparison_df.to_string(index=False))
    f.write("\n\n")
    
    f.write("="*80 + "\n")

print(f"\n‚úì Training summary saved to: {summary_path}")