In [None]:
import os
import subprocess
from pathlib import Path
import json
from pydantic import BaseModel
import nest_asyncio
import shutil
import logfire
import csv

from pydantic_ai import Agent
from pydantic_ai.models.openai import OpenAIModel
from pydantic_ai.models.anthropic import AnthropicModel
from pydantic_ai.models.gemini import GeminiModel
from pydantic_ai.models.groq import GroqModel

from great_tables import GT, style, loc
import pandas as pd

from dotenv import load_dotenv

logfire.configure()
nest_asyncio.apply()
load_dotenv(override=True)

True

[1mLogfire[0m project URL: ]8;id=219169;https://logfire.pydantic.dev/prayash/hpc4llm\[4;36mhttps://logfire.pydantic.dev/prayash/hpc4llm[0m]8;;\


In [43]:
os.environ["PATH"] = "/usr/local/cuda-12.4/bin:" + os.environ.get("PATH", "")
os.environ["LD_LIBRARY_PATH"] = "/usr/local/cuda-12.4/lib64:" + os.environ.get("LD_LIBRARY_PATH", "")

nvcc_path = shutil.which("nvcc")
if nvcc_path is None:
    print("[ERROR] nvcc not found in PATH. Please ensure that nvcc is installed and its directory is added to the PATH environment variable.")
else:
    print("nvcc found at:", nvcc_path)

nvcc found at: /usr/local/cuda-12.4/bin/nvcc


In [44]:
def clean_generated_code(text: str) -> str:
    """Remove markdown code fences from generated code."""
    cleaned = text.strip()
    if cleaned.startswith("```cuda"):
        lines = cleaned.splitlines()
        if lines[0].strip().startswith("```cuda"):
            lines = lines[1:]
        if lines and lines[-1].strip() == "```":
            lines = lines[:-1]
        cleaned = "\n".join(lines).strip()
    elif cleaned.startswith("```"):
        lines = cleaned.splitlines()
        if lines[0].strip().startswith("```"):
            lines = lines[1:]
        if lines and lines[-1].strip() == "```":
            lines = lines[:-1]
        cleaned = "\n".join(lines).strip()
    return cleaned

In [45]:
prompt = (
    "You are an expert CUDA code generator. Generate a complete and valid CUDA program that launches a kernel where each thread prints:\n"
    "\"Hello from CUDA thread X!\" (where X is the thread index).\n"
    "The program must be self-contained, include a main() function, and be compilable with nvcc (version 12.4, Build cuda_12.4.r12.4/compiler.34097967_0).\n\n"
    "Output must follow this exact format, with no additional commentary or instructions:\n\n"
    "```cuda\n"
    "// Your complete CUDA code here\n"
    "```\n\n"
    "Output only the code block as shown above."
)

In [None]:
generated_dir = Path("data/generated_cuda/integration_check")
generated_dir.mkdir(parents=True, exist_ok=True)

agents = {
    # OpenAI models
    "gpt-4o-mini": Agent(model=OpenAIModel("gpt-4o-mini", api_key=os.getenv("OPENAI_API_KEY"))),
    "o1-mini": Agent(model=OpenAIModel("o1-mini", api_key=os.getenv("OPENAI_API_KEY"))),
    
    # Anthropic models
    "claude-3-5-sonnet-latest": Agent(model=AnthropicModel("claude-3-5-sonnet-latest", api_key=os.getenv("ANTHROPIC_API_KEY"))),
    "claude-3-5-haiku-latest": Agent(model=AnthropicModel("claude-3-5-haiku-latest", api_key=os.getenv("ANTHROPIC_API_KEY"))),
    
    # Gemini models
    "gemini-1.5-flash": Agent(model=GeminiModel("gemini-1.5-flash", api_key=os.getenv("GEMINI_API_KEY"))),
    #"gemini-2.0-flash": Agent(model=GeminiModel("gemini-2.0-flash", api_key=os.getenv("GEMINI_API_KEY"))),

    # Opensource models
    "llama-3.3-70b-versatile": Agent(model=GroqModel("llama-3.3-70b-versatile", api_key=os.getenv("GROQ_API_KEY"))),
    "qwen-2.5-32b": Agent(model=GroqModel("qwen-2.5-32b", api_key=os.getenv("GROQ_API_KEY"))),
    #"deepseek-r1-distill-qwen-32b": Agent(model=GroqModel("deepseek-r1-distill-qwen-32b", api_key=os.getenv("GROQ_API_KEY"))),
}

print("Agents configured:")
for key in agents:
    print(" -", key)

Agents configured:
 - gpt-4o-mini
 - o1-mini
 - claude-3-5-sonnet-latest
 - claude-3-5-haiku-latest
 - gemini-1.5-flash
 - gemini-2.0-flash
 - llama-3.3-70b-versatile
 - qwen-2.5-32b
 - deepseek-r1-distill-qwen-32b


In [47]:
results = []

for model_name, agent in agents.items():
    print(f"\n--- Running integration test for {model_name} ---")
    try:
        # Call the agent with our prompt
        result = agent.run_sync(prompt)
        # Extract the generated text (whether structured or plain text)
        code = result.data if hasattr(result, "data") else result
        cleaned_code = clean_generated_code(code)
    except Exception as e:
        print(f"[ERROR] Code generation failed for {model_name}: {e}")
        cleaned_code = ""
    
    # Save the generated CUDA code to a file
    code_file = generated_dir / f"integrationTest_{model_name}.cu"
    with open(code_file, "w") as f:
        f.write(cleaned_code)
    
    # Compile the CUDA code using nvcc
    binary_file = generated_dir / f"integraitonTest_{model_name}.out"
    compile_cmd = ["nvcc", "-O3", str(code_file), "-o", str(binary_file)]
    try:
        compile_result = subprocess.run(compile_cmd, capture_output=True, text=True, timeout=30)
        if compile_result.returncode != 0:
            print(f"[ERROR] Compilation failed for {model_name}:\n{compile_result.stderr}")
            exec_output = "Compilation Failed"
            compile_status = "Failure"
        else:
            compile_status = "Success"
            # Run the compiled binary
            run_result = subprocess.run([str(binary_file)], capture_output=True, text=True, timeout=30)
            exec_output = run_result.stdout.strip() if run_result.stdout else run_result.stderr.strip()
    except Exception as e:
        print(f"[ERROR] Exception during compile/run for {model_name}: {e}")
        exec_output = "Error"
        compile_status = "Failure"
    
    # Record the result for this model
    results.append({
        "Model": model_name,
        "Compilation": compile_status,
        "Execution Output": exec_output
    })


--- Running integration test for gpt-4o-mini ---
19:29:46.294 agent run prompt=You are an expert CUDA code generator. Generate a complete and...CUDA code here
```

Output only the code block as shown above.
19:29:46.294   preparing model request params run_step=1
19:29:46.295   model request
19:29:47.631   handle model response

--- Running integration test for o1-mini ---
19:29:48.186 agent run prompt=You are an expert CUDA code generator. Generate a complete and...CUDA code here
```

Output only the code block as shown above.
19:29:48.187   preparing model request params run_step=1
19:29:48.188   model request
19:29:54.404   handle model response

--- Running integration test for claude-3-5-sonnet-latest ---
19:29:54.969 agent run prompt=You are an expert CUDA code generator. Generate a complete and...CUDA code here
```

Output only the code block as shown above.
19:29:54.970   preparing model request params run_step=1
19:29:54.970   model request
19:29:56.947   handle model respons

In [48]:
results_csv = Path("data/test_data/integration_results.csv")
results_csv.parent.mkdir(parents=True, exist_ok=True)
with open(results_csv, "w", newline="") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["Model", "Compilation", "Execution Output"])
    writer.writeheader()
    writer.writerows(results)
print(f"\nResults saved to {results_csv}")


Results saved to data/test_data/integration_results.csv


In [49]:
df = pd.read_csv(results_csv)

df["Execution Output"] = df["Execution Output"].apply(
    lambda x: "\n".join(str(x).split("\n")[:3] + ["..."] + str(x).split("\n")[-3:]) 
    if len(str(x).split("\n")) > 6 else x
)

# Display the updated table with summarized output
gt_table = GT(df)
gt_table.show()

Model,Compilation,Execution Output
gpt-4o-mini,Success,Hello from CUDA thread 0! Hello from CUDA thread 1! Hello from CUDA thread 2! ... Hello from CUDA thread 7! Hello from CUDA thread 8! Hello from CUDA thread 9!
o1-mini,Success,Hello from CUDA thread 0! Hello from CUDA thread 1! Hello from CUDA thread 2! ... Hello from CUDA thread 7! Hello from CUDA thread 8! Hello from CUDA thread 9!
claude-3-5-sonnet-latest,Success,Hello from CUDA thread 0! Hello from CUDA thread 1! Hello from CUDA thread 2! ... Hello from CUDA thread 29! Hello from CUDA thread 30! Hello from CUDA thread 31!
claude-3-5-haiku-latest,Success,Hello from CUDA thread 0! Hello from CUDA thread 1! Hello from CUDA thread 2! ... Hello from CUDA thread 93! Hello from CUDA thread 94! Hello from CUDA thread 95!
gemini-1.5-flash,Success,Hello from CUDA thread 128! Hello from CUDA thread 129! Hello from CUDA thread 130! ... Hello from CUDA thread 125! Hello from CUDA thread 126! Hello from CUDA thread 127!
gemini-2.0-flash,Failure,Compilation Failed
llama-3.3-70b-versatile,Success,Hello from CUDA thread 0! Hello from CUDA thread 1! Hello from CUDA thread 2! ... Hello from CUDA thread 7! Hello from CUDA thread 8! Hello from CUDA thread 9!
qwen-2.5-32b,Success,Hello from CUDA thread 128! Hello from CUDA thread 129! Hello from CUDA thread 130! ... Hello from CUDA thread 125! Hello from CUDA thread 126! Hello from CUDA thread 127!
deepseek-r1-distill-qwen-32b,Failure,Compilation Failed
