# C ++ Code Generator

The requirement: use Frontier model and Open Source model to generate high performance C++ code from Python code

To replicate this, you'll need to set up a HuggingFace endpoint. It's simple to do, and it's quite satisfying to see the results!

In [1]:
# imports

import os
import io
import sys
import json
import requests
from dotenv import load_dotenv
from openai import OpenAI
import google.generativeai
import anthropic
from IPython.display import Markdown, display, update_display
import gradio as gr
import subprocess
from huggingface_hub import login, InferenceClient
from transformers import AutoTokenizer

In [2]:
# environment

load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY', 'your-key-if-not-using-env')
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY', 'your-key-if-not-using-env')
# To get your HuggingFace token, simply create your account on the HuggingFace website and create it.
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')
groq_api_key = os.getenv('GROQ_API_KEY')

In [3]:
# initialize

openai = OpenAI()
google.generativeai.configure()
OPENAI_MODEL = "gpt-4o"
GOOGLE_MODEL = "gemini-2.0-flash-exp"
GROQ_MODEL = "llama-3.3-70b-versatile"


In [6]:
system_message = "You are an assistant that reimplements Python code in high performance C++ for PC under Windows. "
system_message += "Respond only with C++ code; use comments sparingly and do not provide any explanation other than occasional comments. "
system_message += "The C++ response needs to produce an identical output in the fastest possible time. Keep implementations of random number generators identical so that results match exactly."

In [7]:
def user_prompt_for(python):
    user_prompt = "Rewrite this Python code in C++ with the fastest possible implementation that produces identical output in the least time. "
    user_prompt += "Respond only with C++ code; do not explain your work other than a few comments. "
    user_prompt += "Pay attention to number types to ensure no int overflows. Remember to #include all necessary C++ packages such as iomanip.\n\n"
    user_prompt += python
    return user_prompt

In [8]:
def messages_for(python):
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt_for(python)}
    ]

In [9]:
# write to a file called optimized.cpp

def write_output(cpp):
    code = cpp.replace("```cpp","").replace("```","")
    with open("optimized.cpp", "w") as f:
        f.write(code)

In [10]:
pi = """
import time

def calculate(iterations, param1, param2):
    result = 1.0
    for i in range(1, iterations+1):
        j = i * param1 - param2
        result -= (1/j)
        j = i * param1 + param2
        result += (1/j)
    return result

start_time = time.time()
result = calculate(100_000_000, 4, 1) * 4
end_time = time.time()

print(f"Result: {result:.12f}")
print(f"Execution Time: {(end_time - start_time):.6f} seconds")
"""

In [11]:
exec(pi)

Result: 3.141592658589
Execution Time: 33.201466 seconds


In [12]:
python_hard = """# Be careful to support large number sizes

def lcg(seed, a=1664525, c=1013904223, m=2**32):
    value = seed
    while True:
        value = (a * value + c) % m
        yield value
        
def max_subarray_sum(n, seed, min_val, max_val):
    lcg_gen = lcg(seed)
    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]
    max_sum = float('-inf')
    for i in range(n):
        current_sum = 0
        for j in range(i, n):
            current_sum += random_numbers[j]
            if current_sum > max_sum:
                max_sum = current_sum
    return max_sum

def total_max_subarray_sum(n, initial_seed, min_val, max_val):
    total_sum = 0
    lcg_gen = lcg(initial_seed)
    for _ in range(20):
        seed = next(lcg_gen)
        total_sum += max_subarray_sum(n, seed, min_val, max_val)
    return total_sum

# Parameters
n = 10000         # Number of random numbers
initial_seed = 42 # Initial seed for the LCG
min_val = -10     # Minimum value of random numbers
max_val = 10      # Maximum value of random numbers

# Timing the function
import time
start_time = time.time()
result = total_max_subarray_sum(n, initial_seed, min_val, max_val)
end_time = time.time()

print("Total Maximum Subarray Sum (20 runs):", result)
print("Execution Time: {:.6f} seconds".format(end_time - start_time))
"""

In [13]:
exec(python_hard)

Total Maximum Subarray Sum (20 runs): 10980
Execution Time: 98.249988 seconds


In [21]:
high_k_produit = """# Génère une très longue chaîne de chiffres pseudo-aléatoires
def generate_large_digit_string(length, seed):
    import random
    random.seed(seed)
    return ''.join(str(random.randint(0, 9)) for _ in range(length))

# Calcule le plus grand produit de k chiffres consécutifs
def max_product_in_series(series, k):
    max_product = 0
    current_product = 1
    zero_count = 0

    # Initialisation
    for i in range(k):
        digit = int(series[i])
        if digit == 0:
            zero_count += 1
        else:
            current_product *= digit

    if zero_count == 0:
        max_product = current_product

    # Parcours de la série
    for i in range(k, len(series)):
        exiting = int(series[i - k])
        entering = int(series[i])

        if exiting == 0:
            zero_count -= 1
        else:
            current_product //= exiting

        if entering == 0:
            zero_count += 1
        else:
            current_product *= entering

        if zero_count == 0 and current_product > max_product:
            max_product = current_product

    return max_product

# Paramètres
length = 1000000     # Longueur de la chaîne (1 million de chiffres)
k = 13               # Nombre de chiffres consécutifs
seed = 42            # Graine pour la reproductibilité

import time
print("Génération d'une grande série numérique...")
start = time.time()
series = generate_large_digit_string(length, seed)
print("Série générée en {:.3f} s".format(time.time() - start))

print(f"Recherche du plus grand produit de {k} chiffres consécutifs...")
start = time.time()
result = max_product_in_series(series, k)
print("Produit maximum trouvé :", result)
print("Temps de calcul : {:.6f} secondes".format(time.time() - start))"""


In [22]:
exec(high_k_produit)

Génération d'une grande série numérique...
Série générée en 2.616 s
Recherche du plus grand produit de 13 chiffres consécutifs...
Produit maximum trouvé : 258096513024
Temps de calcul : 1.316650 secondes


In [14]:
## OpenAI model
def stream_gpt(python):    
    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)
    reply = ""
    for chunk in stream:
        fragment = chunk.choices[0].delta.content or ""
        reply += fragment
        yield reply.replace('```cpp\n','').replace('```','')

In [15]:
## Gemini model
def stream_gemini(python):
    gemini = google.generativeai.GenerativeModel(
        model_name=GOOGLE_MODEL,
        system_instruction=system_message
    )
    result = gemini.generate_content(user_prompt_for(python), stream=True)
    reply = ""
    for chuck in result:
        text = chuck.text
        reply += text or ""
        yield reply.replace('```cpp\n','').replace('```','')

In [17]:
##  Groq model
def stream_groq(python):
    groq = OpenAI(api_key = groq_api_key, base_url="https://api.groq.com/openai/v1")
    stream = groq.chat.completions.create(
        model= GROQ_MODEL,
        messages = messages_for(python),
        stream=True
    )
    reply = ""
    for chunk in stream:
        reply += chunk.choices[0].delta.content or ""
        yield result

In [18]:
# Setting environment to use HuggingFace open source models
hf_token = os.environ['HF_TOKEN']
login(hf_token, add_to_git_credential=True)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [4]:
code_qwen = "Qwen/CodeQwen1.5-7B-Chat"
star_coder = "bigcode/starcoder2-7b"

#code_gemma = "google/codegemma-7b-it"

## The endpoint links
## The endpoint must be obtained from HuggingFace and should be activated before running.
CODE_QWEN_URL = "https://omifz2mbs0p1jtzw.us-east4.gcp.endpoints.huggingface.cloud"
CODE_STAR_CDOE_URL = "https://vv7455wo30sk1vkc.us-east4.gcp.endpoints.huggingface.cloud"
#CODE_GEMMA_URL = "https://c5hggiyqachmgnqg.us-east-1.aws.endpoints.huggingface.cloud"

In [24]:
# Using model via endpoint
tokenizer = AutoTokenizer.from_pretrained(code_qwen)
messages = messages_for(pi)
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [25]:
text

'<|im_start|>system\nYou are an assistant that reimplements Python code in high performance C++ for PC under Windows. Respond only with C++ code; use comments sparingly and do not provide any explanation other than occasional comments. The C++ response needs to produce an identical output in the fastest possible time. Keep implementations of random number generators identical so that results match exactly.<|im_end|>\n<|im_start|>user\nRewrite this Python code in C++ with the fastest possible implementation that produces identical output in the least time. Respond only with C++ code; do not explain your work other than a few comments. Pay attention to number types to ensure no int overflows. Remember to #include all necessary C++ packages such as iomanip.\n\n\nimport time\n\ndef calculate(iterations, param1, param2):\n    result = 1.0\n    for i in range(1, iterations+1):\n        j = i * param1 - param2\n        result -= (1/j)\n        j = i * param1 + param2\n        result += (1/j)\

In [None]:
client = InferenceClient(CODE_QWEN_URL, token=hf_token)
stream = client.text_generation(text, stream=True, details=True, max_new_tokens=3000)
result = ""
for r in stream:
    result += r.token.text
    print(result, end = "")

In [28]:
## CodeQwen model
def stream_code_qwen(python):
    tokenizer = AutoTokenizer.from_pretrained(code_qwen)
    messages = messages_for(python)
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    client = InferenceClient(CODE_QWEN_URL, token=hf_token)
    stream = client.text_generation(text, stream=True, details=True, max_new_tokens=3000)
    result = ""
    for r in stream:
        result += r.token.text
        yield result    

In [66]:
## Starcoder model
def stream_star_coder(python):
    tokenizer = AutoTokenizer.from_pretrained(star_coder)
    messages = messages_for(python)
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    client = InferenceClient(CODE_STAR_CDOE_URL, token=hf_token)
    stream = client.text_generation(text, stream=True, details=True, max_new_tokens=3000)
    result = ""
    for r in stream:
        result += r.token.text
        yield result    


In [67]:
def optimize(python, model):
    if model=="GPT":
        result = stream_gpt(python)
    elif model=="Gemini":
        result = stream_gemini(python)
    elif model == "Groq":
        result = stream_groq(python)
    elif model=="CodeQwen":
        result = stream_code_qwen(python)
    elif model == "Star coder":
        result = stream_star_coder(python)
    else:
        raise ValueError("Unknown model")
    for stream_so_far in result:
        yield stream_so_far    

In [68]:
def select_sample_program(sample_program):
    if sample_program=="pi":
        return pi
    elif sample_program=="python_hard":
        return python_hard
    elif sample_program=="high_k_produit":
        return high_k_produit
    else:
        return "Type your Python program here"

In [36]:
# Check compiler on the all kind of system 
# These function return the compiler of the code
import platform

VISUAL_STUDIO_2022_TOOLS = "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\Common7\Tools\\VsDevCmd.bat"
VISUAL_STUDIO_2019_TOOLS = "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\Tools\\VsDevCmd.bat"

simple_cpp = """
#include <iostream>

int main() {
    std::cout << "Hello";
    return 0;
}
"""

def run_cmd(command_to_run):
    try:
        run_result = subprocess.run(command_to_run, check=True, text=True, capture_output=True)
        return run_result.stdout if run_result.stdout else "SUCCESS"
    except:
        return ""

def c_compiler_cmd(filename_base):
    my_platform = platform.system()
    my_compiler = []

    try:
        with open("simple.cpp", "w") as f:
            f.write(simple_cpp)
            
        if my_platform == "Windows":
            if os.path.isfile(VISUAL_STUDIO_2022_TOOLS):
                if os.path.isfile("./simple.exe"):
                    os.remove("./simple.exe")
                compile_cmd = ["cmd", "/c", VISUAL_STUDIO_2022_TOOLS, "&", "cl", "simple.cpp"]
                if run_cmd(compile_cmd):
                    if run_cmd(["./simple.exe"]) == "Hello":
                        my_compiler = ["Windows", "Visual Studio 2022", ["cmd", "/c", VISUAL_STUDIO_2022_TOOLS, "&", "cl", f"{filename_base}.cpp"]]
        
            if not my_compiler:
                if os.path.isfile(VISUAL_STUDIO_2019_TOOLS):
                    if os.path.isfile("./simple.exe"):
                        os.remove("./simple.exe")
                    compile_cmd = ["cmd", "/c", VISUAL_STUDIO_2019_TOOLS, "&", "cl", "simple.cpp"]
                    if run_cmd(compile_cmd):
                        if run_cmd(["./simple.exe"]) == "Hello":
                            my_compiler = ["Windows", "Visual Studio 2019", ["cmd", "/c", VISUAL_STUDIO_2019_TOOLS, "&", "cl", f"{filename_base}.cpp"]]
    
            if not my_compiler:
                my_compiler=[my_platform, "Unavailable", []]
                
        elif my_platform == "Linux":
            if os.path.isfile("./simple"):
                os.remove("./simple")
            compile_cmd = ["g++", "simple.cpp", "-o", "simple"]
            if run_cmd(compile_cmd):
                if run_cmd(["./simple"]) == "Hello":
                    my_compiler = ["Linux", "GCC (g++)", ["g++", f"{filename_base}.cpp", "-o", f"{filename_base}" ]]
    
            if not my_compiler:
                if os.path.isfile("./simple"):
                    os.remove("./simple")
                compile_cmd = ["clang++", "simple.cpp", "-o", "simple"]
                if run_cmd(compile_cmd):
                    if run_cmd(["./simple"]) == "Hello":
                        my_compiler = ["Linux", "Clang++", ["clang++", f"{filename_base}.cpp", "-o", f"{filename_base}"]]
        
            if not my_compiler:
                my_compiler=[my_platform, "Unavailable", []]
    
        elif my_platform == "Darwin":
            if os.path.isfile("./simple"):
                os.remove("./simple")
            compile_cmd = ["clang++", "-Ofast", "-std=c++17", "-march=armv8.5-a", "-mtune=apple-m1", "-mcpu=apple-m1", "-o", "simple", "simple.cpp"]
            if run_cmd(compile_cmd):
                if run_cmd(["./simple"]) == "Hello":
                    my_compiler = ["Macintosh", "Clang++", ["clang++", "-Ofast", "-std=c++17", "-march=armv8.5-a", "-mtune=apple-m1", "-mcpu=apple-m1", "-o", f"{filename_base}", f"{filename_base}.cpp"]]
    
            if not my_compiler:
                my_compiler=[my_platform, "Unavailable", []]
    except:
        my_compiler=[my_platform, "Unavailable", []]
        
    if my_compiler:
        return my_compiler
    else:
        return ["Unknown", "Unavailable", []]


In [37]:
compiler_cmd = c_compiler_cmd("optimized")

In [38]:
compiler_cmd

['Windows',
 'Visual Studio 2019',
 ['cmd',
  '/c',
  'C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\Tools\\VsDevCmd.bat',
  '&',
  'cl',
  'optimized.cpp']]

In [56]:
VISUAL_STUDIO_2019_TOOLS = "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\Tools\\VsDevCmd.bat"

def run_cpp(cpp):
    write_output(cpp)
    if os.path.isfile(VISUAL_STUDIO_2019_TOOLS):
        if os.path.isfile("./optimized.exe"):
            os.remove("./optimized.exe")
        compile_cmd = ["cmd", "/c", VISUAL_STUDIO_2019_TOOLS, "&", "cl", "optimized.cpp"]
        if run_cmd(compile_cmd):
            result = run_cmd(["./optimized.exe"])
    return result


In [54]:
compile_cmd = ["cmd", "/c", VISUAL_STUDIO_2019_TOOLS, "&", "cl", "optimized.cpp"]
run_cmd(compile_cmd)



In [44]:
def execute_python(code):
    try:
        output = io.StringIO()
        sys.stdout = output
        exec(code)
    finally:
        sys.stdout = sys.__stdout__
    return output.getvalue()

In [None]:
# Test
run_cpp(python_hard)

In [41]:
css = """
.python {background-color: #306998;}
.cpp {background-color: #050;}
"""

In [69]:
with gr.Blocks(css=css) as ui:
    gr.Markdown("## Convert code from Python to C++")
    with gr.Row():
        python = gr.Textbox(label="Python code:", value=python_hard, lines=10)
        cpp = gr.Textbox(label="C++ code:", lines=10)
    with gr.Row():
        with gr.Column():
            sample_program = gr.Radio(["pi", "python_hard", "high_k_produit"], label="Sample program", value="python_hard")
            model = gr.Dropdown(["GPT", "Gemini", "Groq", "CodeQwen", "Star coder"], label="Select model", value="GPT")
        with gr.Column():
            architecture = gr.Radio([compiler_cmd[0]], label="Architecture", interactive=False, value=compiler_cmd[0])
            compiler = gr.Radio([compiler_cmd[1]], label="Compiler", interactive=False, value=compiler_cmd[1])
    with gr.Row():
        convert = gr.Button("Convert code")
    with gr.Row():
        python_run = gr.Button("Run Python")
        if not compiler_cmd[1] == "Unavailable":
            cpp_run = gr.Button("Run C++")
        else:
            cpp_run = gr.Button("No compiler to run C++", interactive=False)
    with gr.Row():
        python_out = gr.TextArea(label="Python result:", elem_classes=["python"])
        cpp_out = gr.TextArea(label="C++ result:", elem_classes=["cpp"])

    sample_program.change(select_sample_program, inputs=[sample_program], outputs=[python])
    convert.click(optimize, inputs=[python, model], outputs=[cpp])
    python_run.click(execute_python, inputs=[python], outputs=[python_out])
    cpp_run.click(run_cpp, inputs=[cpp], outputs=[cpp_out])


In [71]:
ui.launch() #inbrowser=True

