# Universal Solver: Math Benchmark Showcase (Colab/Cloud Ready)

This notebook demonstrates benchmarking of advanced math solvers on industry-standard datasets.
- Integrates with HuggingFace Datasets, Kaggle, Google Cloud, and Azure.
- Ready for Google Colab and cloud platforms.
- Results exportable for further analysis.

In [ ]:
!pip install -q datasets pandas rich openpyxl pyarrow python-dotenv
# If running on Colab, authenticate for Kaggle, GCP, Azure as needed.
import os
from pathlib import Path
from datetime import datetime
import pandas as pd
from benchmark_datasets import list_benchmark_datasets, load_benchmark_dataset, get_problem_and_answer
from showcase_advanced_math import agents, solvers
from adv_resolver_math.math_ensemble_adv_ms_hackaton import MathProblemSolver


## 1. Select Dataset and Sample Size
You can use any of the following datasets:

In [ ]:
print(list_benchmark_datasets())
DATASET = 'gsm8k'  # or 'math', 'mathqa', etc.
SAMPLE_SIZE = 5
ds = load_benchmark_dataset(DATASET, sample_size=SAMPLE_SIZE)
print(f'Sampled {SAMPLE_SIZE} problems from {DATASET}')

## 2. Run Benchmark
Run all solvers on the sampled problems.

In [ ]:
rows = []
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
for i, ex in enumerate(ds):
    problem, answer = get_problem_and_answer(ex, DATASET)
    for solver_name, solver in solvers:
        try:
            agent_solutions = [solver.get_solution(agent, problem) for agent in agents]
            result = solver.vote_on_solutions(agent_solutions)
            for sol in agent_solutions:
                rows.append({'solver': solver_name, 'problem': problem, 'dataset': DATASET, 'agent': sol.agent_name, 'answer': sol.answer, 'reference': answer, 'confidence': sol.confidence, 'explanation': sol.explanation, 'i': i, 'timestamp': timestamp})
            rows.append({'solver': solver_name, 'problem': problem, 'dataset': DATASET, 'agent': 'CONSENSUS', 'answer': result.answer, 'reference': answer, 'confidence': result.confidence, 'explanation': '', 'i': i, 'timestamp': timestamp})
        except Exception as e:
            rows.append({'solver': solver_name, 'problem': problem, 'dataset': DATASET, 'agent': 'ERROR', 'answer': str(e), 'reference': answer, 'confidence': 0.0, 'explanation': '', 'i': i, 'timestamp': timestamp})
df = pd.DataFrame(rows)
out_dir = Path('showcase_results')
out_dir.mkdir(exist_ok=True)
out_path = out_dir / f'benchmark_{DATASET}_{timestamp}.parquet'
df.to_parquet(out_path, index=False)
print(f'Benchmark complete. Results saved to {out_path}')

## 3. Export and Visualize Results
You can export to Excel or visualize with pandas, matplotlib, or seaborn.

In [ ]:
df.head()
df.to_excel(out_dir / f'benchmark_{DATASET}_{timestamp}.xlsx', index=False)

## 4. Cloud/Platform Integrations
- **HuggingFace Datasets**: Used for all math datasets.
- **Kaggle**: Download/upload results with `kaggle` CLI.
- **Google Cloud Storage**: Use `gcsfs` or `gsutil` to upload results.
- **Azure Blob Storage**: Use `azure-storage-blob` to upload results.
- **Google Colab**: All code is Colab-ready. Authenticate with Google Drive for persistent storage.

In [ ]:
# Example: Upload to Google Cloud Storage (requires gcsfs)
# !pip install gcsfs
# import gcsfs
# fs = gcsfs.GCSFileSystem(project='your-gcp-project')
# fs.put(str(out_path), 'gs://your-bucket/path/' + str(out_path.name))

# Example: Upload to Azure Blob Storage
# !pip install azure-storage-blob
# from azure.storage.blob import BlobServiceClient
# blob_service_client = BlobServiceClient.from_connection_string('your-conn-string')
# blob_client = blob_service_client.get_blob_client(container='your-container', blob=str(out_path.name))
# with open(out_path, 'rb') as data:
#     blob_client.upload_blob(data, overwrite=True)

# Example: Upload to Kaggle Datasets
# !kaggle datasets version -p showcase_results -m 'New benchmark results'