## **Comparing an Open-Source and a Frontier Model for Coding Task !!**

In [1]:
import os
import requests
from dotenv import load_dotenv
from openai import OpenAI
import sys
import io
import subprocess
import gradio as gr
from IPython.display import Markdown, display, update_display



from huggingface_hub import login, InferenceClient
from transformers import AutoTokenizer  

In [2]:
hf_token = os.getenv("HF_TOKEN")
login(hf_token, add_to_git_credential=True)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [3]:
load_dotenv(override=True)

True

#### **System and User messages !!**

In [4]:
system_message = "You are an assistant that reimplements Python code in high performance C++ code for an Windows 11 x64 PC."
system_message += "Respond only with C++ code; use comments sparingly and do not provide any explanation other than occasional comments."
system_message += "The C++ response needs to produce an identical output in the fastest possible time."
system_message += "Keep implementations of random number generators identical so that results match exactly."


def user_promt_for(python):
    user_prompt = "Rewrite this Python code in C++ with the fastest possible implementation that produces identical output in the least time."
    user_prompt += "Respond only with C++ code; do not explain your work other thanb a few comments."
    user_prompt += "Pay attention to the number types to ensure no int overflows. Remember to #include all necessary C++ packages such as <iomanip>.\n\n"
    user_prompt += python
    return user_prompt


def messages_for(python):
    return [
        {'role': 'system', 'content': system_message}, 
        {'role': 'user', 'content': user_promt_for(python)}
    ]

In [5]:
# write to a file called optimized.cpp

def write_output(cpp):
    code = cpp.replace("```cpp", "").replace("```", "")
    with open("optimized.cpp", "w") as f:
        f.write(code)

In [None]:
# Simple Python code

pi = """
import time

def calculate(iterations, param1, param2):
    result = 1.0
    for i in range(1, iterations+1):
        j = i * param1 - param2
        result -= (1/j)
        j = i * param1 + param2
        result += (1/j)
    return result

start_time = time.time()
result = calculate(100_000_000, 4, 1) * 4
end_time = time.time()

print(f"Result: {result:.12f}")
print(f"Execution Time: {(end_time - start_time):.6f} seconds")
"""

In [None]:
# Hard Python code

python_hard = """# Be careful to support large number sizes

def lcg(seed, a=1664525, c=1013904223, m=2**32):
    value = seed
    while True:
        value = (a * value + c) % m
        yield value
        
def max_subarray_sum(n, seed, min_val, max_val):
    lcg_gen = lcg(seed)
    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]
    max_sum = float('-inf')
    for i in range(n):
        current_sum = 0
        for j in range(i, n):
            current_sum += random_numbers[j]
            if current_sum > max_sum:
                max_sum = current_sum
    return max_sum

def total_max_subarray_sum(n, initial_seed, min_val, max_val):
    total_sum = 0
    lcg_gen = lcg(initial_seed)
    for _ in range(20):
        seed = next(lcg_gen)
        total_sum += max_subarray_sum(n, seed, min_val, max_val)
    return total_sum

# Parameters
n = 10000         # Number of random numbers
initial_seed = 42 # Initial seed for the LCG
min_val = -10     # Minimum value of random numbers
max_val = 10      # Maximum value of random numbers

# Timing the function
import time
start_time = time.time()
result = total_max_subarray_sum(n, initial_seed, min_val, max_val)
end_time = time.time()

print("Total Maximum Subarray Sum (20 runs):", result)
print("Execution Time: {:.6f} seconds".format(end_time - start_time))
"""

In [8]:
CODE_QWEN = "Qwen/Qwen2.5-Coder-32B-Instruct"
GPT_MODEL = "gpt-4.1-mini"


---

> In this case, we are using `chat_completions_api`, so we dont need to convert messages into templates using the tokenizer.

> We can directly pass the `messages`(system and user) to the model.

> Tokenization, for now, is just for reference in case u work with a `text_generation_api`

In [9]:
tokenizer = AutoTokenizer.from_pretrained(CODE_QWEN)
messages = messages_for(pi) 
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

---

In [None]:

# Testing out the Open-Source Model

client = InferenceClient(
    provider="nebius",
    api_key=os.environ["HF_TOKEN"],
)

stream = client.chat.completions.create(
    model=CODE_QWEN,
    messages=messages, 
    stream=True,
    max_tokens=3000
)

full_response = ""
for chunk in stream:
    if chunk.choices[0].delta.content:
        token = chunk.choices[0].delta.content
        print(token, end="", flush=True)
        full_response += token

```cpp
#include <iostream>
#include <iomanip>
#include <chrono>

double calculate(int iterations, int param1, int param2) {
    double result = 1.0;
    for (int i = 1; i <= iterations; ++i) {
        long long j = static_cast<long long>(i) * param1 - param2;
        result -= 1.0 / j;
        j = static_cast<long long>(i) * param1 + param2;
        result += 1.0 / j;
    }
    return result;
}

int main() {
    auto start_time = std::chrono::high_resolution_clock::now();
    double result = calculate(100000000, 4, 1) * 4;
    auto end_time = std::chrono::high_resolution_clock::now();

    std::cout << std::fixed << std::setprecision(12);
    std::cout << "Result: " << result << std::endl;

    std::chrono::duration<double> duration = end_time - start_time;
    std::cout << "Execution Time: " << duration.count() << " seconds" << std::endl;

    return 0;
}
```

In [None]:
# Code conversion using 'Qwen2.5-Coder-32B-Instruct'

def stream_code_qwen(python):
    messages = messages_for(python)
    client = InferenceClient(provider="nebius", api_key=os.getenv("HF_TOKEN"))
    stream = client.chat.completions.create(
        model=CODE_QWEN, 
        messages=messages,
        stream=True,
        max_tokens=3000
    )
    full_response = ""
    for chunk in stream:
        fragment = chunk.choices[0].delta.content or ""
        # full_response += fragment
        # print(fragment, end="", flush=True)
        yield fragment

    # return full_response

In [None]:
# Code conversion using 'gpt-4.1-mini'

def stream_gpt(python):
    messages = messages_for(python)
    openai = OpenAI()
    stream = openai.chat.completions.create(model=GPT_MODEL, messages=messages, stream=True, max_tokens=3000)

    # reply=""
    for chunk in stream:
        fragment = chunk.choices[0].delta.content or ""
        # reply += fragment
        # print(fragment, end="", flush=True)
        yield fragment
        
    # return reply

In [None]:
# To convert the code using any of the two models

def optimize(python, model):
    response = ""
    if model == "GPT":
        for chunk in stream_gpt(python):
            response += chunk
            yield response
    elif model == "CodeQwen":
        for chunk in stream_code_qwen(python):
            response += chunk
            yield response
    else:
        raise ValueError("Unknown Model")

In [17]:
# To execute Python code

def execute_python(code):
    try:
        output = io.StringIO()
        sys.stdout = output
        exec(code)
    finally:
        sys.stdout = sys.__stdout__
    return output.getvalue()


# To execute C++ code

def execute_cpp(code):
    write_output(code)
    try:
        compile_cmd = [
            "g++", "-Ofast", "-std=c++17", "-march=native",
            "-o", "optimized.exe", "optimized.cpp"
        ]
        compile_result = subprocess.run(compile_cmd, check=True, text=True, capture_output=True)
        
        run_cmd = ["optimized.exe"]
        run_result = subprocess.run(run_cmd, check=True, text=True, capture_output=True)
        
        return run_result.stdout
    except subprocess.CalledProcessError as e:
        return f"An error occurred:\n{e.stderr}"

#### **UI time!!**

In [18]:
css = """
.python {background-color: #306998;}
.cpp {background-color: #050;}
"""

In [None]:
with gr.Blocks(css=css) as ui:
    gr.Markdown("## Convert Code from Python to C++")
    with gr.Row():
        python = gr.Textbox(label="Python Code", value=pi, lines=10)
        cpp = gr.Textbox(label="C++ Code", lines=10)
    with gr.Row():
        model = gr.Dropdown(["GPT", "CodeQwen"], label="Select Model", value="GPT")
    with gr.Row():
        convert = gr.Button("Convert Code")
    with gr.Row():
        python_run = gr.Button("Run Python")
        cpp_run = gr.Button("Run C++")
    with gr.Row():
        python_out = gr.TextArea(label="Python Result: ", elem_classes=["python"])
        cpp_out = gr.TextArea(label="C++ Result: ", elem_classes=["cpp"])

    convert.click(optimize, inputs=[python, model], outputs=[cpp])
    python_run.click(execute_python, inputs=[python], outputs=[python_out])
    cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out])

ui.launch(inbrowser=True)

> The Open-Source model, `Qwen2.5-Coder-32B-Instruct`, which is for now on top of the leaderboard, coudn't convert the `python_hard` code into a working C++ Code. 
---
> However, the frontier model, `gpt-4.1-mini` did it successfully.