In [1]:
# --- Imports and environment setup ---

import os
import time
import subprocess
from typing import Dict, Tuple, List, Optional

from dotenv import load_dotenv
from litellm import completion, completion_cost  # LLM call + cost helper


In [2]:
# --- Load API keys from .env (do not commit .env to GitHub) ---

load_dotenv(override=True)

openai_api_key = os.getenv("OPENAI_API_KEY")
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
gemini_api_key = os.getenv("GEMINI_API_KEY")

if not any([openai_api_key, anthropic_api_key, gemini_api_key]):
    raise RuntimeError(
        "No API keys found. Please set at least one of: "
        "OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY."
    )



In [None]:
# --- Model configuration and system prompt for code translation ---

models = {
    "gpt":    "gpt-5",
    "claude": "anthropic/claude-opus-4-5-20251101",
    "gemini": "gemini/gemini-2.5-pro", 
}

system_message = (
    "You are an AI code generator. Convert Python code into highly optimized C++ that "
    "compiles successfully with Clang on macOS (Apple Silicon).\n\n"
    "=== HARD RULES ===\n"
    "• Output ONLY valid C++17 source code. No markdown, no backticks, no prose.\n"
    "• DO NOT use '#include <bits/stdc++.h>'. Use portable headers like <iostream>, <iomanip>, <cmath>, <vector>, <algorithm>.\n"
    "• DO NOT use OpenMP (#include <omp.h> or '#pragma omp ...').\n"
    "• DO NOT output shell commands or lines like 'Compile with: ...'.\n\n"
    "=== REQUIREMENTS ===\n"
    "• Code must compile with: clang++ -std=c++17 -Ofast -mcpu=native -flto=thin -DNDEBUG\n"
    "• Numeric results must match the Python code.\n"
    "• Use fast loops, appropriate floating point types, and only standard headers.\n"
)


In [None]:
# --- Prompt construction helpers (Python → C++) ---

def user_prompt_for(python_code: str) -> str:
    return (
        "Rewrite this Python code in C++ with the fastest possible implementation that produces identical output "
        "in the least time. Respond only with valid C++ code; do not include any natural language instructions, "
        "shell commands, or lines like 'Compile with ...'. "
        "Use comments sparingly and only inside the C++ file. "
        "Pay attention to number types to ensure no int overflows. Remember to #include all necessary C++ "
        "packages such as <iomanip>.\n\n"
        f"{python_code}"
    )


def messages_for(python_code: str) -> List[Dict[str, str]]:
    """
    Build the messages payload for the chat completion call.
    """
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt_for(python_code)},
    ]


def reasoning_effort_for(model_key: str) -> Optional[str]:
    # You want low for GPT + Claude, none for Gemini
    if model_key in ("gpt", "claude"):
        return "low"
    return None


In [None]:
# --- File helper: write generated C++ to main.cpp ---

def write_output(cpp_code: str, filename: str = "main.cpp") -> None:
    """
    Clean up LLM-generated C++ so it compiles on macOS/Clang:
      - remove markdown fences
      - replace <bits/stdc++.h> with standard headers
      - remove OpenMP includes/pragmas
      - remove 'Compile with ...' / shell-like junk
    """
    # 1) Remove markdown code fences if any
    clean = cpp_code.replace("```cpp", "").replace("```", "")

    lines_out = []
    for line in clean.splitlines():
        stripped = line.strip()

        # 2) Replace GCC-only header with portable includes
        if stripped.startswith("#include <bits/stdc++.h>"):
            lines_out.append("#include <iostream>")
            lines_out.append("#include <iomanip>")
            lines_out.append("#include <cmath>")
            lines_out.append("#include <vector>")
            lines_out.append("#include <algorithm>")
            continue

        # 3) Drop OpenMP (we're not linking libomp for now)
        if "<omp.h>" in stripped:
            continue
        if stripped.startswith("#pragma omp"):
            continue

        # 4) Drop human instructions / compile hints
        if stripped.startswith("Compile with:") or "clang++" in stripped:
            continue

        lines_out.append(line)

    with open(filename, "w") as f:
        f.write("\n".join(lines_out))


In [None]:
# --- LLM call: return C++ code + cost (USD) + latency (seconds) ---

def optimize_llm(python_code: str, model_key: str) -> Tuple[str, float, float]:
    if model_key not in models:
        raise ValueError(f"Unknown model key: {model_key}")

    kwargs: Dict[str, object] = {}
    effort = reasoning_effort_for(model_key)
    if effort is not None:
        kwargs["reasoning_effort"] = effort

    start = time.perf_counter()
    resp = completion(
        model=models[model_key],
        messages=messages_for(python_code),
        **kwargs,
    )
    latency = time.perf_counter() - start

    cpp_code = resp["choices"][0]["message"]["content"]
    cost = completion_cost(completion_response=resp)

    return cpp_code, cost, latency



In [7]:
# --- Baseline: run the original Python code and time it ---

def run_python_and_time(code: str) -> float:
    """
    Execute the Python snippet and return wall-clock execution time in seconds.
    The snippet itself may also print its own timing info.
    """
    globals_dict = {"__builtins__": __builtins__}
    start = time.perf_counter()
    exec(code, globals_dict)
    end = time.perf_counter()
    return end - start

In [8]:
# --- C++ compile and run commands (clang++ on main.cpp) ---

compile_command = [
    "clang++", "main.cpp", "-o", "main",
    "-std=c++17",
    "-Ofast",
    "-mcpu=native",
    "-flto=thin",
    "-fvisibility=hidden",
    "-DNDEBUG",
    "-Xpreprocessor", "-fopenmp",
    "-lomp"
]

run_command = ["./main"]


In [9]:
# --- Single-model experiment: Python baseline → LLM → C++ → compile → run ---

def run_experiment(
    model_key: str,
    python_code: str,
    repeats: int = 1,
    python_time: Optional[float] = None,
) -> None:
    """
    For a single model:
      1) Use provided python_time if given, otherwise measure it.
      2) Use LLM to generate C++ from Python.
      3) Compile C++ into ./main.
      4) Run the C++ binary 'repeats' times.
      5) For each run, print:
         - model name
         - C++ program output (result + C++ internal timing)
         - speedup vs Python baseline
         - LLM generation cost and latency
    """
    # 1) Python baseline (measure here only if not provided)
    if python_time is None:
        python_time = run_python_and_time(python_code)
        print(f"Python baseline execution time: {python_time:.6f} seconds\n")

    # 2) Generate C++ via LLM
    cpp_code, cost_usd, llm_latency = optimize_llm(
        python_code=python_code,
        model_key=model_key,
    )
    write_output(cpp_code)

    # 3) Compile C++
    compile_proc = subprocess.run(
        compile_command,
        check=False,
        text=True,
        capture_output=True,
    )
    if compile_proc.returncode != 0:
        print(f"Compilation failed for model '{model_key}':")
        print(compile_proc.stderr)
        return

    # 4) Run C++ binary
    for i in range(repeats):
        start_cpp = time.perf_counter()
        run_proc = subprocess.run(
            run_command,
            check=False,
            text=True,
            capture_output=True,
        )
        cpp_time = time.perf_counter() - start_cpp

        if run_proc.returncode != 0:
            print(f"Run {i + 1} failed for model '{model_key}':")
            print(run_proc.stderr)
            continue

        improvement = (python_time / cpp_time) if cpp_time > 0 else float("inf")

        print(f"Run {i + 1} output:")
        print(f'Model: "{model_key}"')
        # C++ program is expected to print "Result: ..." and "Execution Time: ..."
        print(run_proc.stdout.strip())
        print(f"Improvement: {improvement:.2f}x faster than Python")
        print(f"Cost (LLM generation): ${cost_usd:.6f}")
        print(f"LLM generation latency: {llm_latency:.3f} seconds\n")

In [10]:
# --- Multi-model driver: run one model or all models ---

def run_for_model_or_all(model_key: str, python_code: str, repeats: int = 1) -> None:
    """
    If model_key == 'all':
        - Run Python baseline once.
        - For each model in 'models', generate C++, compile, and run once.
    Otherwise:
        - Run only the specified model.
    """
    if model_key == "all":
        # Measure Python baseline once for all models
        python_time = run_python_and_time(python_code)
        print(f"Python baseline execution time: {python_time:.6f} seconds\n")

        for key in models.keys():
            print(f"=== Model: {key} ===")
            try:
                run_experiment(
                    model_key=key,
                    python_code=python_code,
                    repeats=repeats,
                    python_time=python_time,
                )
            except Exception as e:
                print(f"Error running model '{key}': {e}\n")
    else:
        # Single model; run_experiment will measure Python baseline itself
        run_experiment(
            model_key=model_key,
            python_code=python_code,
            repeats=repeats,
        )

In [11]:
# --- Example Python code: π approximation via series ---

pi = """
import time

def calculate(iterations, param1, param2):
    result = 1.0
    for i in range(1, iterations+1):
        j = i * param1 - param2
        result -= (1/j)
        j = i * param1 + param2
        result += (1/j)
    return result

start_time = time.time()
result = calculate(100_000_000, 4, 1) * 4
end_time = time.time()

print(f"Result: {result:.12f}")
print(f"Execution Time: {(end_time - start_time):.6f} seconds")
"""

In [12]:
# --- Run the full pipeline: choose one model or 'all' ---

# Example 1: run all models once
run_for_model_or_all("all", pi, repeats=1)

# Example 2: run only GPT once
# run_for_model_or_all("gpt", pi, repeats=1)

# Example 3: run only Claude or Gemini
# run_for_model_or_all("claude", pi, repeats=1)
# run_for_model_or_all("gemini", pi, repeats=1)

Result: 3.141592658589
Execution Time: 12.104728 seconds
Python baseline execution time: 12.105158 seconds

=== Model: gpt ===
Compilation failed for model 'gpt':
main.cpp:1:10: fatal error: 'bits/stdc++.h' file not found
    1 | #include <bits/stdc++.h>
      |          ^~~~~~~~~~~~~~~
1 error generated.

=== Model: claude ===
Compilation failed for model 'claude':
ld: library 'omp' not found
clang++: error: linker command failed with exit code 1 (use -v to see invocation)

=== Model: gemini ===
Compilation failed for model 'gemini':
ld: library 'omp' not found
clang++: error: linker command failed with exit code 1 (use -v to see invocation)

