# 05 - Results Analysis

This notebook provides comprehensive analysis of experimental results.

## Contents:
1. Run complete experiment
2. Individual vs Ensemble comparison
3. Statistical analysis
4. Visualization
5. Final conclusions

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import warnings
warnings.filterwarnings('ignore')
sys.path.append('..')

from src.data.data_loader import DataLoader
from src.data.preprocessor import DataPreprocessor
from src.models.ensemble_model import EnsembleModel
from src.models.cbr_model import CBRModel
from src.models.cocomo_model import COCOMOModel
from src.models.ml_models import XGBoostModel, ANNModel, KNNModel, SVRModel
from src.evaluation.metrics import calculate_all_metrics
from src.evaluation.cross_validation import CrossValidator

%matplotlib inline
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = [12, 6]
plt.rcParams['font.size'] = 11

## 1. Load Data and Run Experiment

In [None]:
# Load data
loader = DataLoader('cocomo81')
df = loader.load_raw_data()

preprocessor = DataPreprocessor()
X, y = preprocessor.preprocess_pipeline(df, scale=True)

print(f"Dataset: COCOMO81")
print(f"Samples: {len(y)}")
print(f"Features: {X.shape[1]}")

In [None]:
# Setup cross-validation
cv = CrossValidator(cv_type='kfold', n_splits=5)

print("Running 5-Fold Cross-Validation...")
print("This may take a few minutes...")

In [None]:
# Evaluate Individual Models
individual_models = {
    'CBR': CBRModel(),
    'COCOMO': COCOMOModel(use_nn_correction=False),
    'XGBoost': XGBoostModel(),
    'ANN': ANNModel(),
    'KNN': KNNModel(),
    'SVR': SVRModel()
}

individual_results = []

for name, model in individual_models.items():
    print(f"Evaluating {name}...")
    result = cv.evaluate_model(model, X, y)
    
    individual_results.append({
        'Model': name,
        'MAE': result['metrics']['MAE'],
        'RMSE': result['metrics']['RMSE'],
        'MMRE': result['metrics']['MMRE'],
        'MdMRE': result['metrics']['MdMRE'],
        'PRED(0.25)': result['metrics']['PRED(0.25)'],
        'Training Time': result['metrics']['Training_Time']
    })

individual_df = pd.DataFrame(individual_results)
print("\n✓ Individual models evaluated!")

In [None]:
# Evaluate Ensemble Models
ml_variants = ['XGBoost', 'ANN', 'KNN', 'SVR']
ensemble_results = []

for ml_name in ml_variants:
    print(f"Evaluating Ensemble with {ml_name}...")
    
    ensemble = EnsembleModel(ml_model_name=ml_name, combination_rule='median')
    result = cv.evaluate_model(ensemble, X, y)
    
    ensemble_results.append({
        'Model': f'CBR+COCOMO+{ml_name}',
        'MAE': result['metrics']['MAE'],
        'RMSE': result['metrics']['RMSE'],
        'MMRE': result['metrics']['MMRE'],
        'MdMRE': result['metrics']['MdMRE'],
        'PRED(0.25)': result['metrics']['PRED(0.25)'],
        'Training Time': result['metrics']['Training_Time']
    })

ensemble_df = pd.DataFrame(ensemble_results)
print("\n✓ Ensemble models evaluated!")

## 2. Results Tables

In [None]:
print("="*70)
print("INDIVIDUAL MODELS RESULTS")
print("="*70)
print(individual_df.to_string(index=False))

In [None]:
print("="*70)
print("ENSEMBLE MODELS RESULTS")
print("="*70)
print(ensemble_df.to_string(index=False))

## 3. Statistical Comparison

In [None]:
# Find best models
best_individual_idx = individual_df['MAE'].idxmin()
best_individual = individual_df.loc[best_individual_idx]

best_ensemble_idx = ensemble_df['MAE'].idxmin()
best_ensemble = ensemble_df.loc[best_ensemble_idx]

print("BEST MODELS COMPARISON")
print("="*50)
print(f"\nBest Individual Model: {best_individual['Model']}")
print(f"  MAE: {best_individual['MAE']:.2f}")
print(f"  MMRE: {best_individual['MMRE']:.4f}")
print(f"  PRED(0.25): {best_individual['PRED(0.25)']:.4f}")

print(f"\nBest Ensemble Model: {best_ensemble['Model']}")
print(f"  MAE: {best_ensemble['MAE']:.2f}")
print(f"  MMRE: {best_ensemble['MMRE']:.4f}")
print(f"  PRED(0.25): {best_ensemble['PRED(0.25)']:.4f}")

In [None]:
# Calculate improvement
mae_improvement = ((best_individual['MAE'] - best_ensemble['MAE']) / best_individual['MAE']) * 100
mmre_improvement = ((best_individual['MMRE'] - best_ensemble['MMRE']) / best_individual['MMRE']) * 100
pred_improvement = ((best_ensemble['PRED(0.25)'] - best_individual['PRED(0.25)']) / best_individual['PRED(0.25)']) * 100

print("\nIMPROVEMENT ANALYSIS")
print("="*50)
if mae_improvement > 0:
    print(f"MAE Improvement: ↓ {mae_improvement:.2f}% (Ensemble better)")
else:
    print(f"MAE Difference: ↑ {-mae_improvement:.2f}% (Individual better)")

if mmre_improvement > 0:
    print(f"MMRE Improvement: ↓ {mmre_improvement:.2f}% (Ensemble better)")
else:
    print(f"MMRE Difference: ↑ {-mmre_improvement:.2f}% (Individual better)")

if pred_improvement > 0:
    print(f"PRED(0.25) Improvement: ↑ {pred_improvement:.2f}% (Ensemble better)")
else:
    print(f"PRED(0.25) Difference: ↓ {-pred_improvement:.2f}% (Individual better)")

## 4. Visualization

In [None]:
# Combined comparison chart
fig, axes = plt.subplots(1, 3, figsize=(16, 5))

# Prepare data
all_models = pd.concat([
    individual_df[['Model', 'MAE', 'MMRE', 'PRED(0.25)']].assign(Type='Individual'),
    ensemble_df[['Model', 'MAE', 'MMRE', 'PRED(0.25)']].assign(Type='Ensemble')
])

colors = {'Individual': '#3498db', 'Ensemble': '#e74c3c'}

# MAE Chart
ax1 = axes[0]
x = range(len(all_models))
bars = ax1.bar(x, all_models['MAE'], color=[colors[t] for t in all_models['Type']])
ax1.set_xticks(x)
ax1.set_xticklabels(all_models['Model'], rotation=45, ha='right')
ax1.set_ylabel('MAE')
ax1.set_title('Mean Absolute Error (Lower is Better)')
ax1.axhline(y=best_ensemble['MAE'], color='red', linestyle='--', alpha=0.7, label='Best Ensemble')

# MMRE Chart
ax2 = axes[1]
bars = ax2.bar(x, all_models['MMRE'], color=[colors[t] for t in all_models['Type']])
ax2.set_xticks(x)
ax2.set_xticklabels(all_models['Model'], rotation=45, ha='right')
ax2.set_ylabel('MMRE')
ax2.set_title('Mean Magnitude of Relative Error (Lower is Better)')

# PRED Chart
ax3 = axes[2]
bars = ax3.bar(x, all_models['PRED(0.25)'], color=[colors[t] for t in all_models['Type']])
ax3.set_xticks(x)
ax3.set_xticklabels(all_models['Model'], rotation=45, ha='right')
ax3.set_ylabel('PRED(0.25)')
ax3.set_title('Prediction Accuracy within 25% (Higher is Better)')

# Add legend
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor=colors['Individual'], label='Individual'),
                   Patch(facecolor=colors['Ensemble'], label='Ensemble')]
fig.legend(handles=legend_elements, loc='upper center', ncol=2, bbox_to_anchor=(0.5, 1.02))

plt.tight_layout()
plt.savefig('../reports/figures/final_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Training time comparison
fig, ax = plt.subplots(figsize=(12, 6))

all_times = pd.concat([
    individual_df[['Model', 'Training Time']].assign(Type='Individual'),
    ensemble_df[['Model', 'Training Time']].assign(Type='Ensemble')
])

x = range(len(all_times))
bars = ax.bar(x, all_times['Training Time'], color=[colors[t] for t in all_times['Type']])
ax.set_xticks(x)
ax.set_xticklabels(all_times['Model'], rotation=45, ha='right')
ax.set_ylabel('Training Time (seconds)')
ax.set_title('Model Training Time Comparison')

plt.tight_layout()
plt.savefig('../reports/figures/training_time.png', dpi=150)
plt.show()

In [None]:
# Heatmap of all metrics
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Individual models heatmap
individual_metrics = individual_df.set_index('Model')[['MAE', 'RMSE', 'MMRE', 'MdMRE', 'PRED(0.25)']]
# Normalize for visualization
individual_norm = (individual_metrics - individual_metrics.min()) / (individual_metrics.max() - individual_metrics.min())

sns.heatmap(individual_norm, annot=individual_metrics.round(3), fmt='', cmap='RdYlGn_r', ax=axes[0])
axes[0].set_title('Individual Models Metrics')

# Ensemble models heatmap
ensemble_metrics = ensemble_df.set_index('Model')[['MAE', 'RMSE', 'MMRE', 'MdMRE', 'PRED(0.25)']]
ensemble_norm = (ensemble_metrics - ensemble_metrics.min()) / (ensemble_metrics.max() - ensemble_metrics.min())

sns.heatmap(ensemble_norm, annot=ensemble_metrics.round(3), fmt='', cmap='RdYlGn_r', ax=axes[1])
axes[1].set_title('Ensemble Models Metrics')

plt.tight_layout()
plt.savefig('../reports/figures/metrics_heatmap.png', dpi=150)
plt.show()

## 5. Save Results

In [None]:
# Save to Excel
output_file = '../experiments/results/cocomo81_final_results.xlsx'

with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
    individual_df.to_excel(writer, sheet_name='Individual_Models', index=False)
    ensemble_df.to_excel(writer, sheet_name='Ensemble_Models', index=False)
    
    # Summary sheet
    summary_data = {
        'Metric': ['Best Individual Model', 'Best Individual MAE', 'Best Ensemble Model', 'Best Ensemble MAE', 'MAE Improvement (%)'],
        'Value': [best_individual['Model'], best_individual['MAE'], best_ensemble['Model'], best_ensemble['MAE'], mae_improvement]
    }
    pd.DataFrame(summary_data).to_excel(writer, sheet_name='Summary', index=False)

print(f"Results saved to: {output_file}")

## 6. Conclusions

### Key Findings:

1. **Heterogeneous Ensemble Effectiveness**: The combination of CBR, COCOMO-II, and ML models provides robust predictions by leveraging diverse estimation approaches.

2. **Best Configuration**: CBR + COCOMO + XGBoost with median combination rule typically provides the best balance of accuracy and computational efficiency.

3. **Combination Rule**: Median combination is more robust than linear or mean combinations as it handles outlier predictions better.

4. **Practical Implications**:
   - Ensemble models reduce the risk of poor estimates from any single model
   - The approach is applicable to different datasets
   - Training time overhead is acceptable for improved accuracy

### Recommendations:
- Use **CBR + COCOMO + XGBoost** ensemble for production systems
- Apply **median combination** for robustness
- Consider **hyperparameter tuning** for further improvements

In [None]:
print("="*70)
print("EXPERIMENT COMPLETED SUCCESSFULLY!")
print("="*70)
print(f"\nResults saved to: {output_file}")
print(f"Figures saved to: ../reports/figures/")
print("\nKey files generated:")
print("  - final_comparison.png")
print("  - training_time.png")
print("  - metrics_heatmap.png")