In [None]:
import os
import subprocess
from pathlib import Path
import json
from pydantic import BaseModel
import nest_asyncio
import shutil
import logfire
import csv
import re

from pydantic_ai import Agent
from pydantic_ai.models.openai import OpenAIModel
from pydantic_ai.models.anthropic import AnthropicModel
from pydantic_ai.models.gemini import GeminiModel
from pydantic_ai.models.groq import GroqModel

from great_tables import GT, style, loc
import pandas as pd

from dotenv import load_dotenv

logfire.configure()
nest_asyncio.apply()
load_dotenv(override=True)

True

[1mLogfire[0m project URL: ]8;id=457258;https://logfire.pydantic.dev/prayash/hpc4llm\[4;36mhttps://logfire.pydantic.dev/prayash/hpc4llm[0m]8;;\


In [16]:
os.environ["PATH"] = "/usr/local/cuda-12.4/bin:" + os.environ.get("PATH", "")
os.environ["LD_LIBRARY_PATH"] = "/usr/local/cuda-12.4/lib64:" + os.environ.get("LD_LIBRARY_PATH", "")

nvcc_path = shutil.which("nvcc")
if nvcc_path is None:
    print("[ERROR] nvcc not found in PATH. Please ensure that nvcc is installed and its directory is added to the PATH environment variable.")
else:
    print("nvcc found at:", nvcc_path)

nvcc found at: /usr/local/cuda-12.4/bin/nvcc


In [None]:
def extract_cuda_code(text: str) -> str:
    """
    Extract the first code block enclosed by triple backticks.
    Prefer blocks that start with ```cuda. If none found, fallback to a generic triple-backtick block.
    Removes any <think>... or other extraneous text outside code fences.
    """
    text_no_think = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL | re.IGNORECASE)

    pattern_cuda = r'```cuda(.*?)```'
    match = re.search(pattern_cuda, text_no_think, flags=re.DOTALL | re.IGNORECASE)
    if not match:
        pattern_generic = r'```(.*?)```'
        match = re.search(pattern_generic, text_no_think, flags=re.DOTALL)
    
    if match:
        code = match.group(1).strip()
    else:
        code = text_no_think.strip()
    
    return code

In [None]:
prompt = (
    "You are an expert in high-performance CUDA code generation. Generate a complete and valid CUDA program "
    "that performs a sum reduction on an array of 1024 integers:\n"
    "  - Measure kernel execution time using CUDA events\n"
    "  - Print the final sum\n"
    "  - Print the kernel execution time\n"
    "  - Be self-contained with a main() function\n"
    "  - Compile with nvcc (version 12.4)\n\n"
    "Below is a minimal main function skeleton that you may expand upon. You may change it or replace it entirely "
    "with your own structure, as long as the requirements above are met:\n\n"
    "```\n"
    "#include <cuda_runtime.h>\n"
    "#include <iostream>\n"
    "\n"
    "// You may rename or modify this kernel as you see fit.\n"
    "__global__ void sumReductionKernel(int *d_input, int *d_output) {\n"
    "    // Fill in your sum reduction logic here.\n"
    "}\n"
    "\n"
    "int main() {\n"
    "    // 1. Allocate and initialize an array of 1024 integers on the host.\n"
    "    // 2. Allocate device memory.\n"
    "    // 3. Copy data from host to device.\n"
    "    // 4. Create CUDA events to measure execution time.\n"
    "    // 5. Launch the sum reduction kernel.\n"
    "    // 6. Record the kernel execution time.\n"
    "    // 7. Copy the final sum back to the host.\n"
    "    // 8. Print the sum and the kernel execution time.\n"
    "    // 9. Clean up device memory.\n"
    "\n"
    "    return 0;\n"
    "}\n"
    "```\n\n"
    "Produce only a single code block with the final, self-contained program that meets the requirements. "
    "Output must follow this exact format, with no additional commentary or disclaimers:\n\n"
    "```cuda\n"
    "// Your complete high-performance CUDA code here\n"
    "```\n\n"
)


In [19]:
generated_dir = Path("data/1_benchmark")
generated_dir.mkdir(parents=True, exist_ok=True)

agents = {
    # OpenAI models
    "gpt-4o-mini": Agent(model=OpenAIModel("gpt-4o-mini", api_key=os.getenv("OPENAI_API_KEY"))),
    "o1-mini": Agent(model=OpenAIModel("o1-mini", api_key=os.getenv("OPENAI_API_KEY"))),
    
    # Anthropic models
    "claude-3-5-sonnet-latest": Agent(model=AnthropicModel("claude-3-5-sonnet-latest", api_key=os.getenv("ANTHROPIC_API_KEY"))),
    "claude-3-5-haiku-latest": Agent(model=AnthropicModel("claude-3-5-haiku-latest", api_key=os.getenv("ANTHROPIC_API_KEY"))),
    
    # Gemini models
    "gemini-1.5-flash": Agent(model=GeminiModel("gemini-1.5-flash", api_key=os.getenv("GEMINI_API_KEY"))),
    #"gemini-2.0-flash": Agent(model=GeminiModel("gemini-2.0-flash", api_key=os.getenv("GEMINI_API_KEY"))),

    # Opensource models
    "llama-3.3-70b-versatile": Agent(model=GroqModel("llama-3.3-70b-versatile", api_key=os.getenv("GROQ_API_KEY"))),
    "qwen-2.5-32b": Agent(model=GroqModel("qwen-2.5-32b", api_key=os.getenv("GROQ_API_KEY"))),
    #"deepseek-r1-distill-qwen-32b": Agent(model=GroqModel("deepseek-r1-distill-qwen-32b", api_key=os.getenv("GROQ_API_KEY"))),
}


print("Agents configured:")
for key in agents:
    print(" -", key)
results = []


Agents configured:
 - gpt-4o-mini
 - o1-mini
 - claude-3-5-sonnet-latest
 - claude-3-5-haiku-latest
 - gemini-1.5-flash
 - llama-3.3-70b-versatile
 - qwen-2.5-32b


In [20]:
results = []

for model_name, agent in agents.items():
    print(f"\n--- Running baseline sum reduction test for {model_name} ---")
    try:
        # Call the agent with our prompt
        response = agent.run_sync(prompt)
        code_text = response.data if hasattr(response, "data") else response
        cleaned_code = extract_cuda_code(code_text)
    except Exception as e:
        print(f"[ERROR] Code generation failed for {model_name}: {e}")
        cleaned_code = ""
    
    # Save the generated CUDA code
    code_file = generated_dir / f"baseline_{model_name}.cu"
    with open(code_file, "w") as f:
        f.write(cleaned_code)
    
    # Compile using nvcc
    binary_file = generated_dir / f"baseline_{model_name}.out"
    compile_cmd = ["nvcc", "-O3", str(code_file), "-o", str(binary_file)]
    try:
        compile_result = subprocess.run(compile_cmd, capture_output=True, text=True, timeout=30)
        if compile_result.returncode != 0:
            print(f"[ERROR] Compilation failed for {model_name}:\n{compile_result.stderr}")
            exec_output = "Compilation Failed"
            compile_status = "Failure"
        else:
            compile_status = "Success"
            # Run the compiled binary
            run_result = subprocess.run([str(binary_file)], capture_output=True, text=True, timeout=30)
            exec_output = run_result.stdout.strip() if run_result.stdout else run_result.stderr.strip()
    except Exception as e:
        print(f"[ERROR] Exception during compile/run for {model_name}: {e}")
        exec_output = "Error"
        compile_status = "Failure"
    
    # Record results
    results.append({
        "Model": model_name,
        "Compilation": compile_status,
        "Execution Output": exec_output
    })



--- Running baseline sum reduction test for gpt-4o-mini ---
19:50:41.167 agent run prompt=You are an expert in high-performance CUDA code generation. Ge...```cuda
// Your complete high-performance CUDA code here
```


19:50:41.186   preparing model request params run_step=1
19:50:41.187   model request
19:50:52.310   handle model response

--- Running baseline sum reduction test for o1-mini ---
19:50:53.145 agent run prompt=You are an expert in high-performance CUDA code generation. Ge...```cuda
// Your complete high-performance CUDA code here
```


19:50:53.146   preparing model request params run_step=1
19:50:53.146   model request
19:51:07.784   handle model response

--- Running baseline sum reduction test for claude-3-5-sonnet-latest ---
19:51:08.611 agent run prompt=You are an expert in high-performance CUDA code generation. Ge...```cuda
// Your complete high-performance CUDA code here
```


19:51:08.612   preparing model request params run_step=1
19:51:08.612   model request
19

In [21]:
results_csv = Path("data/test_data/baseline_results.csv")
results_csv.parent.mkdir(parents=True, exist_ok=True)

with open(results_csv, "w", newline="") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["Model", "Compilation", "Execution Output"])
    writer.writeheader()
    writer.writerows(results)

print(f"\nResults saved to {results_csv}")



Results saved to data/test_data/baseline_results.csv


In [22]:
df = pd.read_csv(results_csv)
gt_table = GT(df)
gt_table.show()


Model,Compilation,Execution Output
gpt-4o-mini,Success,Final Sum: 1024 Kernel Execution Time: 14.9934 ms
o1-mini,Success,Sum: 1024 Kernel execution time: 12.6894 ms
claude-3-5-sonnet-latest,Success,Sum: 1024 Kernel execution time: 13.5342 ms
claude-3-5-haiku-latest,Success,Final Sum: 1024 Kernel Execution Time: 13.3683 ms
gemini-1.5-flash,Failure,Compilation Failed
llama-3.3-70b-versatile,Success,Final sum: 1024 Kernel execution time: 19.4846 milliseconds
qwen-2.5-32b,Failure,Compilation Failed
