In [17]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import pandas as pd
import numpy as np
from matplotlib.table import Table
import os

# Set style
try:
    plt.style.use('seaborn-v0_8-darkgrid')
except:
    try:
        plt.style.use('seaborn-darkgrid')
    except:
        plt.style.use('default')
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.size'] = 10


In [18]:
# Data for all datasets
datasets = {
    'Student Performance': {
        'XGBoost': {'MAE': 0.7274, 'RMSE': 1.9263, 'R2': 0.7375, 'MAPE': 1.04, 'Accuracy': 98.92, 'Fairness': 0.4186, 'Readiness': 18.69},
        'RandomForest': {'MAE': 1.1197, 'RMSE': 2.1954, 'R2': 0.6590, 'MAPE': 1.63, 'Accuracy': 98.34, 'Fairness': 0.2309, 'Readiness': 17.30},
        'LightGBM': {'MAE': 0.7314, 'RMSE': 1.9028, 'R2': 0.7439, 'MAPE': 1.04, 'Accuracy': 98.91, 'Fairness': 0.4249, 'Readiness': 17.72},
        'LogisticRegression': {'MAE': 1.1172, 'RMSE': 2.2838, 'R2': 0.6310, 'MAPE': 1.62, 'Accuracy': 98.34, 'Fairness': 0.4619, 'Readiness': 17.16}
    },
    'University Ranking': {
        'XGBoost': {'MAE': 368.81, 'RMSE': 486.46, 'R2': 0.2165, 'MAPE': 35.09, 'Accuracy': 95.9, 'Fairness': 0.0004, 'Readiness': 18.79},
        'RandomForest': {'MAE': 368.04, 'RMSE': 482.22, 'R2': 0.2301, 'MAPE': 34.76, 'Accuracy': 95.9, 'Fairness': 0.0084, 'Readiness': 18.80},
        'LightGBM': {'MAE': 364.51, 'RMSE': 476.47, 'R2': 0.2484, 'MAPE': 35.11, 'Accuracy': 95.9, 'Fairness': 0.0044, 'Readiness': 18.79},
        'LogisticRegression': {'MAE': 939.08, 'RMSE': 1088.19, 'R2': -2.9204, 'MAPE': 99.67, 'Accuracy': 10.7, 'Fairness': 0.0014, 'Readiness': 12.54}
    },
    'Enrollment': {
        'XGBoost': {'MAE': 235251, 'RMSE': 369604, 'R2': 0.9970, 'MAPE': 4.08, 'Accuracy': 99.7, 'Fairness': 398335, 'Readiness': 14.997},
        'RandomForest': {'MAE': 226740, 'RMSE': 420093, 'R2': 0.9961, 'MAPE': 3.04, 'Accuracy': 99.6, 'Fairness': 106705, 'Readiness': 14.996},
        'LightGBM': {'MAE': 1975339, 'RMSE': 2389779, 'R2': 0.8734, 'MAPE': 35.67, 'Accuracy': 87.3, 'Fairness': 1584734, 'Readiness': 14.873},
        'LogisticRegression': {'MAE': 6036301, 'RMSE': 6349858, 'R2': 0.1063, 'MAPE': 245.16, 'Accuracy': 10.6, 'Fairness': 3093992, 'Readiness': 14.106}
    },
    'Dropout': {
        'XGBoost': {'MAE': 0.306, 'RMSE': 0.444, 'R2': 0.4559, 'MAPE': int(float("1.355932e+09")), 'Accuracy': 76.3, 'Fairness': 0.0271, 'Readiness': 18.81},
        'RandomForest': {'MAE': 0.289, 'RMSE': 0.414, 'R2': 0.4932, 'MAPE': int(float("1.378531e+09")), 'Accuracy': 77.3, 'Fairness': 0.0065, 'Readiness': 18.86},
        'LightGBM': {'MAE': 0.303, 'RMSE': 0.441, 'R2': 0.4600, 'MAPE': int(float("1.401130e+09")), 'Accuracy': 76.6, 'Fairness': 0.0068, 'Readiness': 18.83},
        'LogisticRegression': {'MAE': 0.314, 'RMSE': 0.454, 'R2': 0.4434, 'MAPE': int(float("1.299435e+09")), 'Accuracy': 75.6, 'Fairness': 0.0087, 'Readiness': 17.78}
    },
    'Budget Prediction': {
        # Averages across 9 labels
        'XGBoost': {'MAE': 0.0815, 'RMSE': 1.1606, 'R2': 0.9318, 'MAPE': 2.3151, 'Accuracy': 98.52, 'Fairness': 0.0009, 'Readiness': 19.90},
        'RandomForest': {'MAE': 1.5031, 'RMSE': 22.0941, 'R2': 0.2156, 'MAPE': 38.8370, 'Accuracy': 77.40, 'Fairness': 0.0008, 'Readiness': 18.76},
        'LightGBM': {'MAE': 2.2800, 'RMSE': 50.1843, 'R2': 0.6690, 'MAPE': 40.8317, 'Accuracy': 76.00, 'Fairness': 0.0015, 'Readiness': 19.20},
        'LogisticRegression': {'MAE': 0.1042, 'RMSE': 1.3952, 'R2': 0.9140, 'MAPE': 2.8706, 'Accuracy': 97.86, 'Fairness': 0.0011, 'Readiness': 18.89}
    }
}

models = ['XGBoost', 'RandomForest', 'LightGBM', 'LogisticRegression']



In [19]:
def format_number(value, metric_type):
    """Format numbers for display"""
    if value is None:
        return 'N/A'

    if metric_type == 'MAE':
        if value >= 1000:
            return f'{value/1000:.1f}K' if value < 1000000 else f'{value/1000000:.2f}M'
        return f'{value:.4f}' if value < 1 else f'{value:.2f}'
    elif metric_type == 'RMSE':
        if value >= 1000:
            return f'{value/1000:.1f}K' if value < 1000000 else f'{value/1000000:.2f}M'
        return f'{value:.4f}' if value < 1 else f'{value:.2f}'
    elif metric_type == 'R2':
        return f'{value:.4f}'
    elif metric_type == 'MAPE':
        return f'{value:.2f}%'
    elif metric_type == 'Accuracy':
        return f'{value:.2f}%'
    elif metric_type == 'Fairness':
        if value >= 1000:
            return f'{value/1000:.1f}K' if value < 1000000 else f'{value/1000000:.2f}M'
        return f'{value:.4f}'
    elif metric_type == 'Readiness':
        return f'{value:.2f}'
    return str(value)


In [20]:
def create_performance_table(dataset_name, data, output_path):
    """Create performance metrics table"""
    fig, ax = plt.subplots(figsize=(14, 6))
    ax.axis('tight')
    ax.axis('off')

    # Prepare data
    table_data = []
    for model in models:
        row = [
            model,
            format_number(data[model]['MAE'], 'MAE'),
            format_number(data[model]['RMSE'], 'RMSE'),
            format_number(data[model]['R2'], 'R2'),
            format_number(data[model]['MAPE'], 'MAPE') if data[model]['MAPE'] is not None else 'N/A',
            format_number(data[model]['Accuracy'], 'Accuracy')
        ]
        table_data.append(row)

    # Create table
    table = ax.table(cellText=table_data,
                     colLabels=['Model', 'MAE', 'RMSE', 'R²', 'MAPE (%)', 'Accuracy (%)'],
                     cellLoc='center',
                     loc='center',
                     bbox=[0, 0, 1, 1])

    # Style table
    table.auto_set_font_size(False)
    table.set_fontsize(11)
    table.scale(1, 2.5)

    # Color header
    for i in range(6):
        table[(0, i)].set_facecolor('#4472C4')
        table[(0, i)].set_text_props(weight='bold', color='white')

    # Highlight best values
    metrics = ['MAE', 'RMSE', 'R2', 'MAPE', 'Accuracy']
    metric_indices = [1, 2, 3, 4, 5]

    for idx, metric in zip(metric_indices, metrics):
        if metric == 'MAPE' and dataset_name == 'Dropout':
            continue  # Skip MAPE for Dropout

        values = []
        for model in models:
            val = data[model][metric]
            if val is not None:
                values.append(val)

        if values:
            if metric in ['MAE', 'RMSE', 'MAPE']:
                best_idx = np.argmin(values)
            else:  # R2, Accuracy
                best_idx = np.argmax(values)

            best_model_idx = [i for i, v in enumerate([data[m][metric] for m in models]) if v == values[best_idx]][0]
            table[(best_model_idx + 1, idx)].set_facecolor('#90EE90')

    # Style model names
    for i in range(1, 5):
        table[(i, 0)].set_facecolor('#E7E6E6')
        table[(i, 0)].set_text_props(weight='bold')

    plt.title(f'Performance Metrics Summary - {dataset_name}', fontsize=16, fontweight='bold', pad=20)
    plt.savefig(output_path, bbox_inches='tight', facecolor='white', edgecolor='none')
    plt.close()
    print(f'Created: {output_path}')


In [21]:
def create_fairness_table(dataset_name, data, output_path):
    """Create fairness summary table"""
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.axis('tight')
    ax.axis('off')

    # Prepare data
    table_data = []
    for model in models:
        disparity = data[model]['Fairness']
        table_data.append([model, format_number(disparity, 'Fairness')])

    # Create table
    table = ax.table(cellText=table_data,
                     colLabels=['Model', 'Disparity (Lower is Better)'],
                     cellLoc='center',
                     loc='center',
                     bbox=[0, 0, 1, 1])

    # Style table
    table.auto_set_font_size(False)
    table.set_fontsize(12)
    table.scale(1, 3)

    # Color header
    for i in range(2):
        table[(0, i)].set_facecolor('#4472C4')
        table[(0, i)].set_text_props(weight='bold', color='white')

    # Highlight best (lowest disparity)
    disparities = [data[model]['Fairness'] for model in models]
    best_idx = np.argmin(disparities)
    table[(best_idx + 1, 1)].set_facecolor('#90EE90')

    # Style model names
    for i in range(1, 5):
        table[(i, 0)].set_facecolor('#E7E6E6')
        table[(i, 0)].set_text_props(weight='bold')

    plt.title(f'Fairness Summary - {dataset_name}', fontsize=16, fontweight='bold', pad=20)
    plt.savefig(output_path, bbox_inches='tight', facecolor='white', edgecolor='none')
    plt.close()
    print(f'Created: {output_path}')


In [22]:
def create_readiness_table(dataset_name, data, output_path):
    """Create readiness summary table"""
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.axis('tight')
    ax.axis('off')

    # Prepare data
    table_data = []
    for model in models:
        readiness = data[model]['Readiness']
        table_data.append([model, format_number(readiness, 'Readiness')])

    # Create table
    table = ax.table(cellText=table_data,
                     colLabels=['Model', 'Total Readiness Score'],
                     cellLoc='center',
                     loc='center',
                     bbox=[0, 0, 1, 1])

    # Style table
    table.auto_set_font_size(False)
    table.set_fontsize(12)
    table.scale(1, 3)

    # Color header
    for i in range(2):
        table[(0, i)].set_facecolor('#4472C4')
        table[(0, i)].set_text_props(weight='bold', color='white')

    # Highlight best (highest readiness)
    readiness_scores = [data[model]['Readiness'] for model in models]
    best_idx = np.argmax(readiness_scores)
    table[(best_idx + 1, 1)].set_facecolor('#90EE90')

    # Style model names
    for i in range(1, 5):
        table[(i, 0)].set_facecolor('#E7E6E6')
        table[(i, 0)].set_text_props(weight='bold')

    plt.title(f'Readiness Summary - {dataset_name}', fontsize=16, fontweight='bold', pad=20)
    plt.savefig(output_path, bbox_inches='tight', facecolor='white', edgecolor='none')
    plt.close()
    print(f'Created: {output_path}')


In [23]:
# Generate all visualization images
output_dir = 'metrics_visualizations'
os.makedirs(output_dir, exist_ok=True)

# Generate images for each dataset
for dataset_name, data in datasets.items():
    # Clean dataset name for filename
    filename_base = dataset_name.replace(' ', '_').lower()

    # Performance metrics
    perf_path = f'{output_dir}/{filename_base}_performance_metrics.png'
    create_performance_table(dataset_name, data, perf_path)

    # Fairness summary
    fairness_path = f'{output_dir}/{filename_base}_fairness_summary.png'
    create_fairness_table(dataset_name, data, fairness_path)

    # Readiness summary
    readiness_path = f'{output_dir}/{filename_base}_readiness_summary.png'
    create_readiness_table(dataset_name, data, readiness_path)

print(f'\n✅ All visualizations generated in \'{output_dir}\' directory!')
print(f'   Total: {len(datasets) * 3} images created')


Created: metrics_visualizations/student_performance_performance_metrics.png
Created: metrics_visualizations/student_performance_fairness_summary.png
Created: metrics_visualizations/student_performance_readiness_summary.png
Created: metrics_visualizations/university_ranking_performance_metrics.png
Created: metrics_visualizations/university_ranking_fairness_summary.png
Created: metrics_visualizations/university_ranking_readiness_summary.png
Created: metrics_visualizations/enrollment_performance_metrics.png
Created: metrics_visualizations/enrollment_fairness_summary.png
Created: metrics_visualizations/enrollment_readiness_summary.png
Created: metrics_visualizations/dropout_performance_metrics.png
Created: metrics_visualizations/dropout_fairness_summary.png
Created: metrics_visualizations/dropout_readiness_summary.png
Created: metrics_visualizations/budget_prediction_performance_metrics.png
Created: metrics_visualizations/budget_prediction_fairness_summary.png
Created: metrics_visualizatio