# Universal Solver: Advanced Math Benchmark & Visual Report

This notebook provides a comprehensive benchmarking and visual analytics report for the advanced math ensemble solvers. It includes timing, confidence, agreement, and accuracy metrics, as well as rich visualizations and profiling.

In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from ydata_profiling import ProfileReport
from pathlib import Path

# Load results
DATA_DIR = Path('showcase_results')
df = pd.read_parquet(DATA_DIR / 'math_showcase_results.parquet')
df.head()

## 1. Timing & Confidence Benchmarking

In [ ]:
# If timing columns exist, plot them
if 'timing' in df.columns:
    plt.figure(figsize=(10, 5))
    sns.boxplot(data=df, x='solver', y='timing')
    plt.title('Solver Timing Distribution (seconds)')
    plt.show()
else:
    print('No timing data available.')

In [ ]:
plt.figure(figsize=(10, 5))
sns.boxplot(data=df, x='solver', y='confidence')
plt.title('Solver Confidence Distribution')
plt.show()

## 2. Agreement and Consensus Analysis

In [ ]:
# Agreement matrix: how often do agents/solvers agree on the same answer?
agreement = df.groupby(['problem_type', 'solver'])['final_answer'].nunique().reset_index()
agreement['agreement_rate'] = 1 / agreement['final_answer']
plt.figure(figsize=(10, 5))
sns.barplot(data=agreement, x='solver', y='agreement_rate', hue='problem_type')
plt.title('Solver Consensus Rate by Problem Type')
plt.show()

## 3. Interactive Visualizations (Plotly)

In [ ]:
fig = px.box(df, x='solver', y='confidence', color='problem_type', title='Confidence by Solver & Problem Type')
fig.show()

In [ ]:
fig = px.histogram(df, x='final_answer', color='solver', barmode='group', title='Distribution of Final Answers by Solver')
fig.show()

## 4. Automated Profiling Report

In [ ]:
profile = ProfileReport(df, title='Math Solver Showcase Profiling Report', explorative=True)
profile.to_file(DATA_DIR / 'profiling_report.html')
print('Profiling report saved to:', DATA_DIR / 'profiling_report.html')