# 07 — Results Visualization
Generates all report-ready figures for the ISyE 6740 final paper.

In [None]:
import sys, os
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

plt.style.use('seaborn-v0_8-whitegrid')
matplotlib.rcParams['figure.dpi'] = 150
os.makedirs("../figures", exist_ok=True)

## 1. Load All Results

In [None]:
loaded_data = {}

try:
    loaded_data['price_matrix'] = pd.read_parquet("../data/raw/price_matrix.parquet")
    print("Loaded price_matrix")
except Exception as e:
    print(f"Could not load price_matrix: {e}")

try:
    loaded_data['returns_matrix'] = pd.read_parquet("../data/raw/returns_matrix.parquet")
    print("Loaded returns_matrix")
except Exception as e:
    print(f"Could not load returns_matrix: {e}")

try:
    loaded_data['fundamentals'] = pd.read_parquet("../data/raw/fundamentals.parquet")
    print("Loaded fundamentals")
except Exception as e:
    print(f"Could not load fundamentals: {e}")

try:
    loaded_data['feature_matrix'] = pd.read_parquet("../data/processed/feature_matrix.parquet")
    print("Loaded feature_matrix")
except Exception as e:
    print(f"Could not load feature_matrix: {e}")

try:
    loaded_data['pca_data'] = pd.read_parquet("../data/processed/pca_data.parquet")
    print("Loaded pca_data")
except Exception as e:
    print(f"Could not load pca_data: {e}")

try:
    loaded_data['cluster_labels'] = pd.read_parquet("../data/processed/cluster_labels.parquet")
    print("Loaded cluster_labels")
except Exception as e:
    print(f"Could not load cluster_labels: {e}")

try:
    loaded_data['forecast_results'] = pd.read_parquet("../data/processed/forecast_results.parquet")
    print("Loaded forecast_results")
except Exception as e:
    print(f"Could not load forecast_results: {e}")

try:
    loaded_data['evaluation_summary'] = pd.read_parquet("../data/processed/evaluation_summary.parquet")
    print("Loaded evaluation_summary")
except Exception as e:
    print(f"Could not load evaluation_summary: {e}")

## 2. Figure 1: Stock Price Overview

In [None]:
if 'price_matrix' in loaded_data:
    price_df = loaded_data['price_matrix']
    
    # Normalize to base 100
    normalized = (price_df / price_df.iloc[0]) * 100
    
    # Select 6 representative stocks
    sample_cols = normalized.columns[:6]
    
    fig, ax = plt.subplots(figsize=(12, 6))
    for col in sample_cols:
        ax.plot(normalized.index, normalized[col], label=col, linewidth=2)
    
    ax.set_title('Stock Price Overview (Normalized to Base 100)', fontsize=14, fontweight='bold')
    ax.set_xlabel('Date')
    ax.set_ylabel('Normalized Price')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('../figures/fig1_price_overview.png', dpi=300, bbox_inches='tight')
    plt.show()
    print("Saved fig1_price_overview.png")
else:
    print("price_matrix not available")

## 3. Figure 2: PCA Explained Variance

In [None]:
if 'pca_data' in loaded_data and 'feature_matrix' in loaded_data:
    pca_df = loaded_data['pca_data']
    
    try:
        from src.eda_analysis import StockPCA
        
        feature_df = loaded_data['feature_matrix']
        pca = StockPCA()
        pca.fit(feature_df)
        
        fig, ax = plt.subplots(figsize=(10, 6))
        cumsum = np.cumsum(pca.explained_variance_ratio_)
        ax.plot(range(1, len(cumsum) + 1), cumsum, 'bo-', linewidth=2, markersize=8)
        ax.set_title('PCA Explained Variance', fontsize=14, fontweight='bold')
        ax.set_xlabel('Number of Components')
        ax.set_ylabel('Cumulative Explained Variance')
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig('../figures/fig2_pca_variance.png', dpi=300, bbox_inches='tight')
        plt.show()
        print("Saved fig2_pca_variance.png")
    except Exception as e:
        print(f"Error creating PCA plot: {e}")
else:
    print("PCA data not available")

## 4. Figure 3: Stocks in PCA Space by Sector

In [None]:
if 'pca_data' in loaded_data:
    pca_df = loaded_data['pca_data']
    
    fig, ax = plt.subplots(figsize=(10, 8))
    
    if 'PC1' in pca_df.columns and 'PC2' in pca_df.columns:
        ax.scatter(pca_df['PC1'], pca_df['PC2'], s=100, alpha=0.6)
        ax.set_title('Stocks in PCA Space', fontsize=14, fontweight='bold')
        ax.set_xlabel('PC1')
        ax.set_ylabel('PC2')
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig('../figures/fig3_pca_sectors.png', dpi=300, bbox_inches='tight')
        plt.show()
        print("Saved fig3_pca_sectors.png")
else:
    print("PCA data not available")

## 5. Figure 4: Cluster Analysis

In [None]:
if 'cluster_labels' in loaded_data and 'pca_data' in loaded_data:
    cluster_df = loaded_data['cluster_labels']
    pca_df = loaded_data['pca_data']
    
    fig, ax = plt.subplots(figsize=(10, 8))
    
    if 'kmeans_cluster' in cluster_df.columns and 'PC1' in pca_df.columns and 'PC2' in pca_df.columns:
        scatter = ax.scatter(pca_df['PC1'], pca_df['PC2'], c=cluster_df['kmeans_cluster'], cmap='viridis', s=100, alpha=0.7)
        ax.set_title('Cluster Analysis', fontsize=14, fontweight='bold')
        ax.set_xlabel('PC1')
        ax.set_ylabel('PC2')
        plt.colorbar(scatter, ax=ax, label='Cluster')
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig('../figures/fig4_clusters.png', dpi=300, bbox_inches='tight')
        plt.show()
        print("Saved fig4_clusters.png")
else:
    print("Cluster data not available")

## 6. Figure 5: Forecast Comparison

In [None]:
if 'forecast_results' in loaded_data:
    forecast_df = loaded_data['forecast_results']
    
    try:
        grouped = forecast_df.groupby('model')['RMSE'].agg(['mean', 'std'])
        
        fig, ax = plt.subplots(figsize=(10, 6))
        grouped['mean'].plot(kind='bar', ax=ax, color='skyblue', edgecolor='black')
        ax.set_title('Model Forecast Comparison (RMSE)', fontsize=14, fontweight='bold')
        ax.set_ylabel('RMSE')
        ax.set_xlabel('Model')
        ax.grid(True, alpha=0.3, axis='y')
        plt.xticks(rotation=45)
        
        plt.tight_layout()
        plt.savefig('../figures/fig5_forecast_comparison.png', dpi=300, bbox_inches='tight')
        plt.show()
        print("Saved fig5_forecast_comparison.png")
    except Exception as e:
        print(f"Error creating forecast comparison: {e}")
else:
    print("Forecast results not available")

## 7. Figure 6: Model Performance Heatmap

In [None]:
if 'evaluation_summary' in loaded_data:
    eval_df = loaded_data['evaluation_summary']
    
    try:
        fig, ax = plt.subplots(figsize=(10, 6))
        
        if 'RMSE_mean' in eval_df.columns:
            sns.heatmap(eval_df[['RMSE_mean', 'MAE_mean']].T, annot=True, fmt='.3f', cmap='YlOrRd', ax=ax, cbar_kws={'label': 'Error'})
            ax.set_title('Model Performance Heatmap', fontsize=14, fontweight='bold')
            
            plt.tight_layout()
            plt.savefig('../figures/fig6_model_heatmap.png', dpi=300, bbox_inches='tight')
            plt.show()
            print("Saved fig6_model_heatmap.png")
    except Exception as e:
        print(f"Error creating heatmap: {e}")
else:
    print("Evaluation summary not available")

## 8. Summary Statistics Table

In [None]:
summary_stats = {}

if 'price_matrix' in loaded_data:
    summary_stats['Price Data'] = {
        'Shape': loaded_data['price_matrix'].shape,
        'Date Range': f"{loaded_data['price_matrix'].index[0]} to {loaded_data['price_matrix'].index[-1]}"
    }

if 'feature_matrix' in loaded_data:
    summary_stats['Features'] = {
        'Shape': loaded_data['feature_matrix'].shape,
        'Features Count': loaded_data['feature_matrix'].shape[1]
    }

if 'cluster_labels' in loaded_data:
    summary_stats['Clusters'] = {
        'Num Clusters': loaded_data['cluster_labels']['kmeans_cluster'].nunique() if 'kmeans_cluster' in loaded_data['cluster_labels'].columns else 'N/A'
    }

if 'forecast_results' in loaded_data:
    summary_stats['Forecast'] = {
        'Shape': loaded_data['forecast_results'].shape,
        'Mean RMSE': loaded_data['forecast_results']['RMSE'].mean() if 'RMSE' in loaded_data['forecast_results'].columns else 'N/A'
    }

print("\nSummary Statistics:")
for category, stats in summary_stats.items():
    print(f"\n{category}:")
    for key, value in stats.items():
        print(f"  {key}: {value}")

# Save to CSV
summary_df = pd.DataFrame(summary_stats).T
summary_df.to_csv("../figures/summary_table.csv")
print("\nSummary table saved to ../figures/summary_table.csv")

## 9. Report Figures Complete

In [None]:
import glob

figure_files = glob.glob("../figures/*")
print(f"Total files in figures directory: {len(figure_files)}")
print("\nFiles generated:")
for f in sorted(figure_files):
    print(f"  {os.path.basename(f)}")