In [27]:
# Cell 1: sbatch-based SolBenchmarker (CWD & source activate)

import os
import subprocess
import time
import re

class SolBenchmarker:
    """
    An agent that submits a benchmark job via sbatch.

    This version writes all scripts and log files to a 'benchmark_files'
    folder within the current working directory and uses 'source activate'.
    """

    # --- ACTION REQUIRED ---
    # You must change "rapids-23.10" to the actual RAPIDS environment name
    # you found by running 'mamba info --envs' in the terminal.
    def __init__(self, user: str, python_env: str = "rapids25.02"):
        """
        Initializes the benchmarker.

        Args:
            user (str): The ASURITE username, used for monitoring jobs.
            python_env (str): The mamba/conda environment to activate on Sol.
        """
        if not user:
            raise ValueError("ASURITE username is required to monitor SLURM jobs.")
        self.user = user
        self.python_env = python_env

    def _generate_sbatch_script(self, script_dir: str, cpu_script_name: str, gpu_script_name: str) -> str:
        """
        Generates the content of the sbatch script for the benchmark job.
        This version uses 'source activate' as requested.
        """
        sbatch_script_content = f"""#!/bin/bash
#SBATCH -p general
#SBATCH -q public
#SBATCH -G 1
#SBATCH -A grp_hackathon2025
#SBATCH --reservation=hackathon2025
#SBATCH -t 0-00:10:00
#SBATCH -c 1
#SBATCH -o {script_dir}/slurm-%j.out
#SBATCH -e {script_dir}/slurm-%j.err

# Load the necessary modules and environment
module load mamba/latest
source activate {self.python_env}

# --- CPU BENCHMARK ---
echo "--- STARTING CPU BENCHMARK ---"
/usr/bin/time -p python3 {script_dir}/{cpu_script_name} 2>&1
echo "--- FINISHED CPU BENCHMARK ---"

echo ""
# --- GPU BENCHMARK ---
echo "--- STARTING GPU BENCHMARK ---"
/usr/bin/time -p python3 {script_dir}/{gpu_script_name} 2>&1
echo "--- FINISHED GPU BENCHMARK ---"
"""
        return sbatch_script_content

    def _parse_output(self, output_content: str) -> dict:
        """
        Parses the SLURM output file to find the benchmark times.
        """
        try:
            real_times = re.findall(r"real\s+([\d.]+)", output_content)
            cpu_time = float(real_times[0]) if len(real_times) > 0 else None
            gpu_time = float(real_times[1]) if len(real_times) > 1 else None
            cpu_output_match = re.search(r"--- STARTING CPU BENCHMARK ---\n(.*?)\n--- FINISHED CPU BENCHMARK ---", output_content, re.DOTALL)
            gpu_output_match = re.search(r"--- STARTING GPU BENCHMARK ---\n(.*?)\n--- FINISHED GPU BENCHMARK ---", output_content, re.DOTALL)
            cpu_output = cpu_output_match.group(1).strip() if cpu_output_match else "Could not capture CPU script output."
            gpu_output = gpu_output_match.group(1).strip() if gpu_output_match else "Could not capture GPU script output."

            return {
                "status": "success", "cpu_time_seconds": cpu_time, "gpu_time_seconds": gpu_time,
                "cpu_script_output": cpu_output, "gpu_script_output": gpu_output, "raw_log": output_content
            }
        except (IndexError, ValueError) as e:
            return {
                "status": "error", "message": f"Failed to parse benchmark times. Error: {e}", "raw_log": output_content
            }

    def run_benchmark(self, cpu_code: str, gpu_code: str) -> dict:
        """
        The main method to orchestrate the benchmarking process via sbatch.
        """
        benchmark_dir = os.path.join(os.getcwd(), "benchmark_files")
        os.makedirs(benchmark_dir, exist_ok=True)
        
        cpu_script_path = os.path.join(benchmark_dir, "cpu_benchmark.py")
        gpu_script_path = os.path.join(benchmark_dir, "gpu_benchmark.py")
        sbatch_path = os.path.join(benchmark_dir, "benchmark_job.sh")

        try:
            with open(cpu_script_path, "w") as f: f.write(cpu_code)
            with open(gpu_script_path, "w") as f: f.write(gpu_code)
            sbatch_script = self._generate_sbatch_script(benchmark_dir, "cpu_benchmark.py", "gpu_benchmark.py")
            with open(sbatch_path, "w") as f: f.write(sbatch_script)

            process = subprocess.run(f"sbatch {sbatch_path}", shell=True, capture_output=True, text=True)

            if process.returncode != 0:
                raise RuntimeError(f"sbatch submission failed: {process.stderr}")

            job_id_match = re.search(r"Submitted batch job (\d+)", process.stdout.strip())
            if not job_id_match:
                raise RuntimeError(f"Could not parse Job ID from sbatch output: {process.stdout}")
            job_id = job_id_match.group(1)
            print(f"Successfully submitted job with ID: {job_id}")

            print("Waiting for job to complete...")
            while True:
                queue_process = subprocess.run(f"squeue -u {self.user} -j {job_id}", shell=True, capture_output=True, text=True)
                if job_id not in queue_process.stdout:
                    break
                time.sleep(10)

            print(f"Job {job_id} completed.")
            output_file_path = os.path.join(benchmark_dir, f"slurm-{job_id}.out")
            if not os.path.exists(output_file_path):
                err_file_path = os.path.join(benchmark_dir, f"slurm-{job_id}.err")
                if os.path.exists(err_file_path):
                    with open(err_file_path, "r") as f: error_content = f.read()
                    return {"status": "error", "message": f"Job failed. See error log: {error_content}"}
                return {"status": "error", "message": f"Output file {output_file_path} not found."}

            with open(output_file_path, "r") as f:
                output_content = f.read()

            return self._parse_output(output_content)
        except Exception as e:
            return {"status": "error", "message": str(e)}

In [28]:
# Cell 2: Test Driver (with Larger Problem Size)

# IMPORTANT: You must replace 'YOUR_ASURITE_USERNAME' with your actual username.
ASURITE_USER = "mrajanva"

if ASURITE_USER == "YOUR_ASURITE_USERNAME":
    print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
    print("!!! PLEASE REPLACE 'YOUR_ASURITE_USERNAME' WITH YOUR ASURITE ID !!!")
    print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
else:
    # This section simulates Agent 3 generating the code to be benchmarked.
    # We've increased the matrix size to demonstrate a real GPU speedup.
    cpu_code_to_benchmark = """
import numpy as np
import time

# Create two large matrices
# The problem size is now much larger (12000*12000)
size = 12000
matrix_a = np.random.rand(size, size).astype(np.float32)
matrix_b = np.random.rand(size, size).astype(np.float32)

# Perform matrix multiplication
result = np.dot(matrix_a, matrix_b)

print(f"CPU (NumPy) execution finished. Shape: {result.shape}")
"""

    gpu_code_to_benchmark = """
import cupy as cp
import time

# Create two large matrices on the GPU
# The problem size is now much larger (12000*12000)
size = 12000
matrix_a = cp.random.rand(size, size).astype(cp.float32)
matrix_b = cp.random.rand(size, size).astype(cp.float32)
cp.cuda.runtime.deviceSynchronize() # Wait for matrices to be created

# Perform matrix multiplication
result = cp.dot(matrix_a, matrix_b)
cp.cuda.runtime.deviceSynchronize() # Wait for the computation to complete

print(f"GPU (CuPy) execution finished. Shape: {result.shape}")
"""

    # Instantiate and run the benchmarker
    # Ensure the SolBenchmarker class in Cell 1 is using the correct RAPIDS environment
    benchmarker = SolBenchmarker(user=ASURITE_USER)
    benchmark_results = benchmarker.run_benchmark(
        cpu_code=cpu_code_to_benchmark,
        gpu_code=gpu_code_to_benchmark
    )

    # This section formats the results and now includes better error handling.
    print("\n--- Benchmark Results ---")
    if benchmark_results.get("status") == "success":
        cpu_time = benchmark_results.get('cpu_time_seconds')
        gpu_time = benchmark_results.get('gpu_time_seconds')

        if cpu_time is not None:
            print(f"CPU Time: {cpu_time:.4f} seconds")
        else:
            print("CPU Time: Could not be parsed.")

        if gpu_time is not None:
            print(f"GPU Time: {gpu_time:.4f} seconds")
        else:
            print("GPU Time: Could not be parsed.")

        if cpu_time and gpu_time and gpu_time > 0:
            speedup = cpu_time / gpu_time
            print(f"\nSpeedup: {speedup:.2f}x faster on GPU!")

        print("\nCPU Script Output:")
        print(benchmark_results.get('cpu_script_output'))
        print("\nGPU Script Output:")
        print(benchmark_results.get('gpu_script_output'))
        
        print("\n--- Raw Log From SLURM ---")
        print(benchmark_results.get('raw_log', 'No raw log available.'))
    else:
        # This will catch errors from the agent itself
        print(f"An error occurred during the benchmark run: {benchmark_results.get('message')}")

Successfully submitted job with ID: 28522144
Waiting for job to complete...
Job 28522144 completed.

--- Benchmark Results ---
CPU Time: 505.8900 seconds
GPU Time: 5.4100 seconds

Speedup: 93.51x faster on GPU!

CPU Script Output:
CPU (NumPy) execution finished. Shape: (30000, 30000)
real 505.89
user 494.96
sys 8.62

GPU Script Output:
GPU (CuPy) execution finished. Shape: (30000, 30000)
real 5.41
user 3.54
sys 0.76

--- Raw Log From SLURM ---
--- STARTING CPU BENCHMARK ---
CPU (NumPy) execution finished. Shape: (30000, 30000)
real 505.89
user 494.96
sys 8.62
--- FINISHED CPU BENCHMARK ---

--- STARTING GPU BENCHMARK ---
GPU (CuPy) execution finished. Shape: (30000, 30000)
real 5.41
user 3.54
sys 0.76
--- FINISHED GPU BENCHMARK ---

