# Final Report Generator
## Waste Classification Model Comparison

**Purpose:** Aggregate and compare results from all trained models

**Models Evaluated:**
- EfficientNetB2 (TensorFlow)
- ConvNeXt-Tiny (PyTorch)
- ViT-B16 (PyTorch)

**Experiments:**
- Preprocessed Dataset
- Raw Dataset

---
## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import os

# Set plot style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("✓ Libraries imported successfully")

---
## 2. Load Results from CSV

In [None]:
# Load final_results.csv
results_file = 'final_results.csv'

if not os.path.exists(results_file):
    print(f"❌ Error: {results_file} not found!")
    print("Please run the model training notebooks first to generate results.")
else:
    df = pd.read_csv(results_file)
    print(f"✓ Loaded {len(df)} results from {results_file}\n")
    print("Preview of raw data:")
    display(df.head(10))

---
## 3. Data Processing and Summary Statistics

In [None]:
# Convert string columns to float
numeric_cols = ['accuracy', 'precision', 'recall', 'f1']
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Get the latest results for each model-experiment combination
df_latest = df.sort_values('timestamp').groupby(['model_name', 'experiment_type']).tail(1)

print("\n" + "="*80)
print("LATEST RESULTS FOR EACH MODEL AND EXPERIMENT")
print("="*80)
display(df_latest[['model_name', 'experiment_type', 'accuracy', 'precision', 'recall', 'f1', 'loss', 'timestamp']])

# Summary statistics
print("\n" + "="*80)
print("SUMMARY STATISTICS")
print("="*80)
summary_stats = df_latest.groupby('model_name')[numeric_cols].agg(['mean', 'std', 'min', 'max'])
display(summary_stats)

---
## 4. Model Comparison Table
Compare all models across all metrics

In [None]:
# Create pivot table for better visualization
comparison_table = df_latest.pivot_table(
    index='model_name',
    columns='experiment_type',
    values=['accuracy', 'precision', 'recall', 'f1']
)

print("\n" + "="*80)
print("MODEL COMPARISON: PREPROCESSED vs RAW DATASETS")
print("="*80)
display(comparison_table.round(4))

# Calculate improvement from Raw to Preprocessed
if 'Preprocessed' in df_latest['experiment_type'].values and 'Raw' in df_latest['experiment_type'].values:
    print("\n" + "="*80)
    print("IMPROVEMENT: PREPROCESSED vs RAW (Percentage Points)")
    print("="*80)
    
    for model in df_latest['model_name'].unique():
        prep_data = df_latest[(df_latest['model_name'] == model) & (df_latest['experiment_type'] == 'Preprocessed')]
        raw_data = df_latest[(df_latest['model_name'] == model) & (df_latest['experiment_type'] == 'Raw')]
        
        if not prep_data.empty and not raw_data.empty:
            print(f"\n{model}:")
            for metric in numeric_cols:
                prep_val = prep_data[metric].values[0]
                raw_val = raw_data[metric].values[0]
                improvement = (prep_val - raw_val) * 100
                print(f"  {metric.capitalize():12s}: {improvement:+.2f}%")

---
## 5. Best Performing Models
Identify top performers for each metric

In [None]:
print("\n" + "="*80)
print("BEST PERFORMING MODELS BY METRIC")
print("="*80)

for metric in numeric_cols:
    best_overall = df_latest.loc[df_latest[metric].idxmax()]
    print(f"\n{metric.upper()}:")
    print(f"  Model: {best_overall['model_name']}")
    print(f"  Experiment: {best_overall['experiment_type']}")
    print(f"  Score: {best_overall[metric]:.4f}")
    print(f"  Timestamp: {best_overall['timestamp']}")

# Best overall model (average of all metrics)
df_latest['avg_score'] = df_latest[numeric_cols].mean(axis=1)
best_model = df_latest.loc[df_latest['avg_score'].idxmax()]

print("\n" + "="*80)
print("BEST OVERALL MODEL (Average of All Metrics)")
print("="*80)
print(f"Model:      {best_model['model_name']}")
print(f"Experiment: {best_model['experiment_type']}")
print(f"Accuracy:   {best_model['accuracy']:.4f}")
print(f"Precision:  {best_model['precision']:.4f}")
print(f"Recall:     {best_model['recall']:.4f}")
print(f"F1-Score:   {best_model['f1']:.4f}")
print(f"Avg Score:  {best_model['avg_score']:.4f}")
print("="*80)

---
## 6. Visualization: Model Performance Comparison

In [None]:
# Create comprehensive comparison plot
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
metrics_to_plot = ['accuracy', 'precision', 'recall', 'f1']
titles = ['Accuracy', 'Precision', 'Recall', 'F1-Score']

for idx, (metric, title) in enumerate(zip(metrics_to_plot, titles)):
    ax = axes[idx // 2, idx % 2]
    
    # Prepare data for plotting
    pivot_data = df_latest.pivot_table(
        index='model_name',
        columns='experiment_type',
        values=metric
    )
    
    # Create grouped bar chart
    pivot_data.plot(kind='bar', ax=ax, width=0.8)
    ax.set_title(f'{title} Comparison', fontsize=14, fontweight='bold')
    ax.set_xlabel('Model', fontsize=12)
    ax.set_ylabel(title, fontsize=12)
    ax.set_ylim([0, 1.1])
    ax.legend(title='Experiment', fontsize=10)
    ax.grid(axis='y', alpha=0.3)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
    
    # Add value labels on bars
    for container in ax.containers:
        ax.bar_label(container, fmt='%.3f', fontsize=9)

plt.tight_layout()
plt.savefig('model_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("✓ Comparison chart saved as 'model_comparison.png'")

---
## 7. Visualization: Radar Chart for Model Comparison

In [None]:
from math import pi

# Prepare data for radar chart
categories = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
num_vars = len(categories)

# Calculate angle for each axis
angles = [n / float(num_vars) * 2 * pi for n in range(num_vars)]
angles += angles[:1]

# Create separate radar charts for Preprocessed and Raw
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 7), subplot_kw=dict(projection='polar'))

for ax, exp_type in zip([ax1, ax2], ['Preprocessed', 'Raw']):
    exp_data = df_latest[df_latest['experiment_type'] == exp_type]
    
    for _, row in exp_data.iterrows():
        values = [row['accuracy'], row['precision'], row['recall'], row['f1']]
        values += values[:1]
        
        ax.plot(angles, values, 'o-', linewidth=2, label=row['model_name'])
        ax.fill(angles, values, alpha=0.15)
    
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(categories, fontsize=11)
    ax.set_ylim(0, 1)
    ax.set_yticks([0.2, 0.4, 0.6, 0.8, 1.0])
    ax.set_yticklabels(['0.2', '0.4', '0.6', '0.8', '1.0'], fontsize=9)
    ax.set_title(f'{exp_type} Dataset', fontsize=14, fontweight='bold', pad=20)
    ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1), fontsize=10)
    ax.grid(True)

plt.tight_layout()
plt.savefig('radar_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("✓ Radar chart saved as 'radar_comparison.png'")

---
## 8. Visualization: Heatmap of All Results

In [None]:
# Create heatmap showing all metrics
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

for idx, exp_type in enumerate(['Preprocessed', 'Raw']):
    exp_data = df_latest[df_latest['experiment_type'] == exp_type]
    heatmap_data = exp_data[['model_name'] + numeric_cols].set_index('model_name')
    
    sns.heatmap(
        heatmap_data,
        annot=True,
        fmt='.4f',
        cmap='YlGnBu',
        vmin=0,
        vmax=1,
        ax=axes[idx],
        cbar_kws={'label': 'Score'}
    )
    axes[idx].set_title(f'{exp_type} Dataset - Performance Heatmap', fontsize=14, fontweight='bold')
    axes[idx].set_xlabel('Metrics', fontsize=12)
    axes[idx].set_ylabel('Models', fontsize=12)

plt.tight_layout()
plt.savefig('heatmap_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("✓ Heatmap saved as 'heatmap_comparison.png'")

---
## 9. Export Summary Report

In [None]:
# Create summary report
summary_report = df_latest[[
    'model_name', 'experiment_type', 'accuracy', 'precision', 'recall', 'f1', 'loss', 'timestamp'
]].sort_values(['model_name', 'experiment_type'])

# Save to Excel with multiple sheets
with pd.ExcelWriter('waste_classification_report.xlsx', engine='openpyxl') as writer:
    # Sheet 1: Summary
    summary_report.to_excel(writer, sheet_name='Summary', index=False)
    
    # Sheet 2: All Results
    df.to_excel(writer, sheet_name='All Results', index=False)
    
    # Sheet 3: Statistics
    summary_stats.to_excel(writer, sheet_name='Statistics')
    
    # Sheet 4: Comparison Table
    comparison_table.to_excel(writer, sheet_name='Comparison')

print("✓ Excel report saved as 'waste_classification_report.xlsx'")

# Save summary as CSV
summary_report.to_csv('summary_report.csv', index=False)
print("✓ Summary CSV saved as 'summary_report.csv'")

---
## 10. Generate Markdown Report

In [None]:
# Generate markdown report
markdown_report = f"""
# Waste Classification Model Comparison Report

**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

**Project:** MMU TCV6313 Waste Classification (Plastic, Aluminum, Paper)

---

## 1. Executive Summary

This report presents a comprehensive comparison of three deep learning models for waste classification:
- **EfficientNetB2** (TensorFlow)
- **ConvNeXt-Tiny** (PyTorch)
- **ViT-B16** (PyTorch)

Each model was evaluated on two datasets:
- **Preprocessed Dataset**: Offline preprocessed images
- **Raw Dataset**: Original unprocessed images

---

## 2. Best Overall Model

- **Model:** {best_model['model_name']}
- **Experiment:** {best_model['experiment_type']}
- **Accuracy:** {best_model['accuracy']:.4f}
- **Precision:** {best_model['precision']:.4f}
- **Recall:** {best_model['recall']:.4f}
- **F1-Score:** {best_model['f1']:.4f}

---

## 3. Results Summary

{summary_report.to_markdown(index=False)}

---

## 4. Key Findings

### Preprocessed vs Raw Dataset
"""

for model in df_latest['model_name'].unique():
    prep_data = df_latest[(df_latest['model_name'] == model) & (df_latest['experiment_type'] == 'Preprocessed')]
    raw_data = df_latest[(df_latest['model_name'] == model) & (df_latest['experiment_type'] == 'Raw')]
    
    if not prep_data.empty and not raw_data.empty:
        prep_acc = prep_data['accuracy'].values[0]
        raw_acc = raw_data['accuracy'].values[0]
        improvement = (prep_acc - raw_acc) * 100
        
        markdown_report += f"""
**{model}:**
- Preprocessed: {prep_acc:.4f}
- Raw: {raw_acc:.4f}
- Improvement: {improvement:+.2f}%
"""

markdown_report += """

---

## 5. Recommendations

Based on the evaluation results:

1. **For Production Deployment:** Use the best overall model for optimal performance
2. **For Resource-Constrained Environments:** Consider the trade-off between accuracy and model size
3. **Data Preprocessing:** The results demonstrate the importance of proper preprocessing

---

## 6. Visualizations

See the following generated charts:
- `model_comparison.png` - Bar charts comparing all metrics
- `radar_comparison.png` - Radar charts for visual comparison
- `heatmap_comparison.png` - Heatmaps showing performance across all metrics

---

*Report generated automatically from training results*
"""

# Save markdown report
with open('FINAL_REPORT.md', 'w') as f:
    f.write(markdown_report)

print("✓ Markdown report saved as 'FINAL_REPORT.md'")
print("\n" + "="*80)
print("ALL REPORTS GENERATED SUCCESSFULLY!")
print("="*80)
print("\nGenerated files:")
print("  - waste_classification_report.xlsx")
print("  - summary_report.csv")
print("  - FINAL_REPORT.md")
print("  - model_comparison.png")
print("  - radar_comparison.png")
print("  - heatmap_comparison.png")
print("="*80)