# Final Results Visualization and Model Comparison

This notebook compares all models trained throughout the project and provides comprehensive visualizations for the final report and presentation.

## 1. Import Libraries

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Configuration
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 11

print("Setup complete!")

## 2. Collect Model Results

**Note:** This section compiles results from all previous notebooks. You'll need to either:
1. Run all previous notebooks and save their results to CSV files, OR
2. Manually enter the performance metrics from each notebook

In [None]:
# Try to load saved results, or create template for manual entry
results_data = []

# Example structure - Replace these with actual results after running notebooks
# Machine Learning Models (from notebook 02)
results_data.append({'Model': 'Logistic Regression', 'Type': 'ML', 'Accuracy': 0.000, 'Precision': 0.000, 'Recall': 0.000, 'F1-Score': 0.000})
results_data.append({'Model': 'SVM', 'Type': 'ML', 'Accuracy': 0.000, 'Precision': 0.000, 'Recall': 0.000, 'F1-Score': 0.000})
results_data.append({'Model': 'Random Forest', 'Type': 'ML', 'Accuracy': 0.000, 'Precision': 0.000, 'Recall': 0.000, 'F1-Score': 0.000})

# Deep Learning Models (from notebook 03)
results_data.append({'Model': 'LSTM', 'Type': 'DL', 'Accuracy': 0.000, 'Precision': 0.000, 'Recall': 0.000, 'F1-Score': 0.000})
results_data.append({'Model': 'Bidirectional LSTM', 'Type': 'DL', 'Accuracy': 0.000, 'Precision': 0.000, 'Recall': 0.000, 'F1-Score': 0.000})
results_data.append({'Model': 'GRU', 'Type': 'DL', 'Accuracy': 0.000, 'Precision': 0.000, 'Recall': 0.000, 'F1-Score': 0.000})
results_data.append({'Model': 'Bidirectional GRU', 'Type': 'DL', 'Accuracy': 0.000, 'Precision': 0.000, 'Recall': 0.000, 'F1-Score': 0.000})

# BERT Model (from notebook 04)
results_data.append({'Model': 'BERT', 'Type': 'Transformer', 'Accuracy': 0.000, 'Precision': 0.000, 'Recall': 0.000, 'F1-Score': 0.000})

# Create DataFrame
results_df = pd.DataFrame(results_data)

print("Model Results Summary:")
print("="*80)
print(results_df.to_string(index=False))
print("\n⚠️  NOTE: Replace 0.000 values with actual results from previous notebooks!")

## 3. Overall Model Comparison - Bar Charts

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
colors = ['#3498db', '#e74c3c', '#2ecc71', '#f39c12']

for idx, (ax, metric, color) in enumerate(zip(axes.flatten(), metrics, colors)):
    # Sort by metric
    sorted_df = results_df.sort_values(metric, ascending=False)
    
    # Create bar plot
    bars = ax.barh(sorted_df['Model'], sorted_df[metric], color=color, alpha=0.7, edgecolor='black')
    
    ax.set_title(f'{metric} Comparison Across All Models', fontsize=14, fontweight='bold')
    ax.set_xlabel(metric, fontsize=12)
    ax.set_ylabel('Model', fontsize=12)
    ax.set_xlim([0, 1])
    ax.grid(axis='x', alpha=0.3)
    
    # Add value labels
    for bar in bars:
        width = bar.get_width()
        ax.text(width, bar.get_y() + bar.get_height()/2.,
                f'{width:.4f}',
                ha='left', va='center', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.savefig('../visuals/charts/all_models_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

## 4. Comparison by Model Type

In [None]:
# Group by model type
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# F1-Score comparison grouped by type
x = np.arange(len(results_df))
width = 0.6

colors_map = {'ML': '#3498db', 'DL': '#e74c3c', 'Transformer': '#2ecc71'}
bar_colors = [colors_map[t] for t in results_df['Type']]

ax = axes[0]
bars = ax.bar(results_df['Model'], results_df['F1-Score'], width, color=bar_colors, alpha=0.7, edgecolor='black')
ax.set_title('F1-Score by Model and Type', fontsize=14, fontweight='bold')
ax.set_ylabel('F1-Score', fontsize=12)
ax.set_xlabel('Model', fontsize=12)
ax.set_ylim([0, 1])
ax.grid(axis='y', alpha=0.3)
ax.tick_params(axis='x', rotation=45)

# Add value labels
for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{height:.3f}',
            ha='center', va='bottom', fontsize=9)

# Create legend
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor=colors_map[t], label=t) for t in colors_map.keys()]
ax.legend(handles=legend_elements, title='Model Type', loc='lower right')

# Average metrics by type
type_avg = results_df.groupby('Type')[['Accuracy', 'Precision', 'Recall', 'F1-Score']].mean()

ax2 = axes[1]
type_avg.plot(kind='bar', ax=ax2, alpha=0.7, edgecolor='black')
ax2.set_title('Average Metrics by Model Type', fontsize=14, fontweight='bold')
ax2.set_ylabel('Score', fontsize=12)
ax2.set_xlabel('Model Type', fontsize=12)
ax2.set_ylim([0, 1])
ax2.grid(axis='y', alpha=0.3)
ax2.legend(title='Metrics', loc='lower right')
ax2.tick_params(axis='x', rotation=0)

plt.tight_layout()
plt.savefig('../visuals/charts/comparison_by_type.png', dpi=300, bbox_inches='tight')
plt.show()

## 5. Heatmap of All Metrics

In [None]:
# Create heatmap of all metrics
plt.figure(figsize=(10, 8))

# Prepare data for heatmap
heatmap_data = results_df.set_index('Model')[['Accuracy', 'Precision', 'Recall', 'F1-Score']]

sns.heatmap(heatmap_data, annot=True, fmt='.4f', cmap='RdYlGn', 
            cbar_kws={'label': 'Score'}, vmin=0, vmax=1,
            linewidths=0.5, linecolor='gray')

plt.title('Performance Heatmap - All Models and Metrics', fontsize=14, fontweight='bold', pad=20)
plt.ylabel('Model', fontsize=12)
plt.xlabel('Metric', fontsize=12)
plt.tight_layout()
plt.savefig('../visuals/charts/performance_heatmap.png', dpi=300, bbox_inches='tight')
plt.show()

## 6. Best Model Selection and Summary Table

In [None]:
# Find best model for each metric
print("="*80)
print("BEST MODELS BY METRIC")
print("="*80)

for metric in ['Accuracy', 'Precision', 'Recall', 'F1-Score']:
    best_idx = results_df[metric].idxmax()
    best_model = results_df.loc[best_idx, 'Model']
    best_score = results_df.loc[best_idx, metric]
    print(f"\n{metric}:")
    print(f"  Best Model: {best_model}")
    print(f"  Score: {best_score:.4f}")

# Overall best model (by F1-Score)
best_model_idx = results_df['F1-Score'].idxmax()
best_model_name = results_df.loc[best_model_idx, 'Model']
best_model_type = results_df.loc[best_model_idx, 'Type']

print("\n" + "="*80)
print("OVERALL BEST MODEL (by F1-Score)")
print("="*80)
print(f"Model: {best_model_name}")
print(f"Type: {best_model_type}")
print(f"Accuracy: {results_df.loc[best_model_idx, 'Accuracy']:.4f}")
print(f"Precision: {results_df.loc[best_model_idx, 'Precision']:.4f}")
print(f"Recall: {results_df.loc[best_model_idx, 'Recall']:.4f}")
print(f"F1-Score: {results_df.loc[best_model_idx, 'F1-Score']:.4f}")

# Save final results
results_df_sorted = results_df.sort_values('F1-Score', ascending=False)
results_df_sorted.to_csv('../reports/final_model_comparison.csv', index=False)
print("\n✓ Results saved to reports/final_model_comparison.csv")

## 7. Executive Summary and Insights

### Project Overview:
This project implemented comprehensive sentiment analysis on the Sentiment140 dataset using multiple approaches:
- **Traditional Machine Learning**: Logistic Regression, SVM, Random Forest
- **Deep Learning (RNNs)**: LSTM, Bidirectional LSTM, GRU, Bidirectional GRU
- **Transfer Learning (Transformers)**: BERT fine-tuning
- **Unsupervised Learning**: K-Means clustering, LDA topic modeling, t-SNE visualization

### Key Findings:

1. **Model Performance Progression:**
   - ML models provide strong baselines with fast training
   - Deep learning (LSTM/GRU) models capture sequential patterns better
   - Transformer models (BERT) achieve state-of-the-art performance
   - Trade-off between performance and computational cost

2. **Best Performing Model:**
   - Typically BERT achieves highest accuracy due to pre-training
   - BiLSTM/BiGRU often close second with faster inference
   - Traditional ML still competitive for resource-constrained scenarios

3. **Model Selection Criteria:**
   - **Best Accuracy**: Choose based on F1-Score ranking
   - **Production Deployment**: Consider inference speed and model size
   - **Real-time Applications**: ML or GRU models for low latency
   - **Maximum Performance**: BERT for highest accuracy

### Recommendations:

**For Production:**
- Use BERT if GPU resources available and accuracy is critical
- Use BiGRU for balanced performance/speed trade-off
- Use Logistic Regression for resource-constrained environments

**For Further Improvement:**
- Ensemble methods combining top models
- Hyperparameter tuning of best models
- Data augmentation techniques
- Domain-specific fine-tuning

### Deliverables:
✓ Trained models saved in `models/` directory  
✓ Visualizations in `visuals/` directory  
✓ Performance metrics in `reports/` directory  
✓ Complete notebooks documenting all experiments