# Code Generation 
      
## Leveraging Frontier Models for High Performance Code Generation in C++  

- Code with Frontier Models including AI Assistants with Tools
- Build Solutions with open-source LLMs with HuggingFace transformers
- Choose the right LLM for the project backed by metrics


- Assess models for coding ability
- Use Frontier models to generate code
- Build a solution that uses LLMs to generate code 

## Build a product that converts Python code to C++ for performance
#### Frontier Model Solution

Please implement this Python code in C++ with the fastest possible implementation for an M1 MAC (or x64?).     
Only respond with the C++ code. Do not explain your implementation.       
The only requirement is that the C++ code prints the same results and runs faster.    

In [2]:
import time 

def calculate(iterations, param1, param2): 
    result=1.0 
    for i in range(1, iterations+1): 
        j = i * param1 - param2
        if j != 0: 
            result -= (1/j) 
            j = i * param1 + param2 
            result += (1/j) 
    return result   

start_time = time.time() 
result = calculate(100_000_000, 4, 4) * 4 
end_time = time.time()   

print(f"Result: {result:.12f}") 
print(f"Execution Time: {(end_time - start_time):.6f} seconds")

Result: 2.500000020000
Execution Time: 19.209496 seconds


Check https://www.vellum.ai/llm-leaderboard and https://scale.com/leaderboard   for a LLM       
SEAL Coding Leaderboard     
https://scale.com/leaderboard/coding     

| Rank | Model | Score | 95% Confidence |
| --- | --- | --- | --- |  
| 1 |o1-mini | 1247 | +34/-31 |     
| 2 | GPT-4o | 1165 | +34/-31 | 
| 3 | o1-preview | 1146 | +28/-25 | 
| 4 | Gemini Pro Flash 2 | 1130 | +29/-29 |  
| 5 | DeepSeek R1 | 1108 | +33/-32 |

In [3]:
# imports

import os
import io
import sys
from dotenv import load_dotenv
from openai import OpenAI
import google.generativeai
import anthropic
import ollama 
from IPython.display import Markdown, display, update_display
import gradio as gr
import subprocess

In [4]:
# environment 

load_dotenv()   

google_api_key = os.getenv('GOOGLE_API_KEY')
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')   

In [5]:
openai = OpenAI()
claude = anthropic.Anthropic()
google.generativeai.configure()

OPENAI_MODEL ="gpt-4o-mini"  
CLAUDE_MODEL = "claude-3-5-sonnet-20240620"   
GOOGLE_MODEL = "gemini-2.0-flash"  
OLLAMA_MODEL = "llama3.2:3b"

In [6]:
system_message = "You are an assistant that reimplements Python code in high performance C++ for an Windows system. "
system_message += "Respond only with C++ code; use comments sparingly and do not provide any explanation other than occasional comments. "
system_message += "The C++ response needs to produce an identical output in the fastest possible time."

In [7]:
def user_prompt_for(python):
    user_prompt = "Rewrite this Python code in C++ with the fastest possible implementation that produces identical output in the least time. "
    user_prompt += "Respond only with C++ code; do not explain your work other than a few comments. "
    user_prompt += "Pay attention to number types to ensure no int overflows. Remember to #include all necessary C++ packages such as iomanip.\n\n"
    user_prompt += python
    return user_prompt

In [8]:
def messages_for(python):
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt_for(python)}
    ]

In [9]:
# write to a file called optimized.cpp

def write_output(cpp):
    code = cpp.replace("```cpp","").replace("```","")
    with open("optimized.cpp", "w") as f:
        f.write(code)
      

In [10]:
def optimize_gpt(python):    
    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)
    reply = ""
    for chunk in stream:
        fragment = chunk.choices[0].delta.content or ""
        reply += fragment
        print(fragment, end='', flush=True)
    write_output(reply)

In [11]:
def optimize_claude(python):
    result = claude.messages.stream(model=CLAUDE_MODEL, max_tokens=2000, system=system_message, messages=[{"role": "user", "content": user_prompt_for(python)}],)
    reply = ""
    with result as stream:
        for text in stream.text_stream:
            reply += text
            print(text, end="", flush=True)
    write_output(reply)

In [12]:
def optimize_google(python):
    gemini = google.generativeai.GenerativeModel(model_name=GOOGLE_MODEL, system_instruction=system_message)
    result = gemini.generate_content(user_prompt_for(python), stream=True)

    reply = ""
    for chunk in result:
        if chunk.text:
            reply += chunk.text
            print(chunk.text, end="", flush=True)
    
    write_output(reply)

In [13]:
def optimize_ollama(python): 
    messages = messages_for(python) 

    reply = ""
    stream = ollama.chat(model=OLLAMA_MODEL, messages=messages, stream=True)
    for chunk in stream:
        fragment = chunk['message']['content']
        reply += fragment
        print(fragment, end='', flush=True)
    write_output(reply)

### Compile C++ Code   

#### MAC 
!clang++ -O3 -std=c++17 -march=armv8.3-a -o optimized optimized.cpp

#### Windows 
Visual Studio - Tools > Command Line > Developer Command Prompt > "cl -o cppexe cppfile.cpp"       
Install MSYS2MINGW64, add path "C:\msys64\ucrt64\bin", "g++ -o cppexe cppfile.cpp"    


In [14]:
pi = """
import time

def calculate(iterations, param1, param2):
    result = 1.0
    for i in range(1, iterations+1):
        j = i * param1 - param2
        result -= (1/j)
        j = i * param1 + param2
        result += (1/j)
    return result

start_time = time.time()
result = calculate(100_000_000, 4, 1) * 4
end_time = time.time()

print(f"Result: {result:.12f}")
print(f"Execution Time: {(end_time - start_time):.6f} seconds")
"""

In [15]:
# Python 
exec(pi)

Result: 3.141592658589
Execution Time: 18.957282 seconds


### GPT

In [16]:
# Convert to C++ with GPT 
optimize_gpt(pi)

```cpp
#include <iostream>
#include <iomanip>
#include <chrono>

double calculate(long long iterations, double param1, double param2) {
    double result = 1.0;
    for (long long i = 1; i <= iterations; ++i) {
        double j1 = i * param1 - param2;
        result -= (1.0 / j1);
        double j2 = i * param1 + param2;
        result += (1.0 / j2);
    }
    return result;
}

int main() {
    auto start_time = std::chrono::high_resolution_clock::now();
    double result = calculate(100000000, 4.0, 1.0) * 4.0;
    auto end_time = std::chrono::high_resolution_clock::now();

    std::chrono::duration<double> execution_time = end_time - start_time;

    std::cout << std::fixed << std::setprecision(12);
    std::cout << "Result: " << result << std::endl;
    std::cout << "Execution Time: " << execution_time.count() << " seconds" << std::endl;

    return 0;
}
```

In [17]:
!g++ -o optimized_gpt optimized.cpp

In [18]:
!.\optimized_gpt.exe

Result: 3.141592658589
Execution Time: 0.338282000000 seconds


### Claude

In [19]:
# Convert to C++ with Claude  
optimize_claude(pi)

#include <iostream>
ip>clude <ioman
ono>lude <chr

 param1, int param2) {long iterations, int
double result = 1.0;
#pragma omp parallel for reduction(-:result)
 = 1; i <= iterations; ++i) {
_cast<double>(param1) - param2;
 / j;   result -= 1.0
 static_cast<double>(param1) + param2;
0 / j;  result += 1.
    }
return result;
}

int main() {
ono::high_resolution_clock::now();
    
(100000000LL, 4, 1) * 4;ulate
    
::high_resolution_clock::now();
<std::chrono::microseconds>(end_time - start_time);
    
d::setprecision(12);::fixed << st
 << result << std::endl;: "
d::cout << "Execution Time: " << duration.count() / 1e6 << " seconds" << std::endl;
    
    return 0;
}

The above code generated by Claude fixed with Anthropic Console      
"It seems like the code has some syntax errors and missing parts. Here's the corrected version:"      

In [None]:
claude_pi_code =""" 
#include <iostream>
#include <iomanip>
#include <chrono>

double calculate(long iterations, int param1, int param2) {
    double result = 1.0;
    #pragma omp parallel for reduction(-:result)
    for (int i = 1; i <= iterations; ++i) {
        double j = i;
        result -= 1.0 / (static_cast<double>(param1) - param2) / j;
        result += 1.0 / (static_cast<double>(param1) + param2) / j;
    }
    return result;
}

int main() {
    auto start_time = std::chrono::high_resolution_clock::now();
    
    double result = calculate(100000000LL, 4, 1) * 4;
    
    auto end_time = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
    
    std::cout << std::fixed << std::setprecision(12);
    std::cout << "Result: " << result << std::endl;
    std::cout << "Execution Time: " << duration.count() / 1e6 << " seconds" << std::endl;
    
    return 0;
}
"""

In [20]:
!g++ -o optimized_claude optimized.cpp

In [22]:
!.\optimized_claude.exe

Result: -6.132211420723
Execution Time: 0.342027000000 seconds


> **Error: incorrect result** 

### Gemini

In [25]:
# Convert python to cpp with Gemini
optimize_google(pi)

```cpp
include <iostream>
#include <iomanip>
#include <chrono>

double calculate(long long iterations, long long param1, long long param2) {
    double result = 1.0;
    for (long long i = 1; i <= iterations; ++i) {
        double j = static_cast<double>(i * param1) - param2;
        result -= (1.0 / j);
 = static_cast<double>(i * param1) + param2;
        result += (1.0 / j);
    }
    return result;
}

int main() {
 std::chrono::high_resolution_clock::now();
    double result = calculate(100000000LL, 4LL, 1LL) * 4.0;
 std::chrono::high_resolution_clock::now();

    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
    double elapsed_seconds = duration.count() / 1000000.0;

 << std::fixed << std::setprecision(12) << "Result: " << result << std::endl;
    std::cout << std::fixed << std::setprecision(6) << "Execution Time: " << elapsed_seconds << " seconds" << std::endl;

0;  return 
}
```

> even though it appears to have some syntax errors, the saved file did not have any errors. Appears to be displaying error with Jupyter See below
>
> It may be the case for the Claude
> 

In [26]:
gemini_code= """ 
#include <iostream>
#include <iomanip>
#include <chrono>

double calculate(long long iterations, long long param1, long long param2) {
    double result = 1.0;
    for (long long i = 1; i <= iterations; ++i) {
        double j = static_cast<double>(i * param1) - param2;
        result -= (1.0 / j);
        j = static_cast<double>(i * param1) + param2;
        result += (1.0 / j);
    }
    return result;
}

int main() {
    auto start_time = std::chrono::high_resolution_clock::now();
    double result = calculate(100000000LL, 4LL, 1LL) * 4.0;
    auto end_time = std::chrono::high_resolution_clock::now();

    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
    double elapsed_seconds = duration.count() / 1000000.0;

    std::cout << std::fixed << std::setprecision(12) << "Result: " << result << std::endl;
    std::cout << std::fixed << std::setprecision(6) << "Execution Time: " << elapsed_seconds << " seconds" << std::endl;

    return 0;
}

"""

In [27]:
!g++ -o optimized_gemini optimized.cpp

In [28]:
!.\optimized_gemini.exe

Result: 3.141592658589
Execution Time: 0.338521 seconds


### Ollama

In [29]:
optimize_ollama(pi)

```cpp
#include <iostream>
#include <chrono>

using namespace std;

// Function to calculate the result using a custom loop unrolling optimization
long double calculate(int iterations, long double param1, long double param2) {
    // Initialize result and accumulate constant terms in j1 and j2
    long double result = 0;
    long double c1 = 0.25 * param1 - param2;
    long double c2 = 0.25 * param1 + param2;

    for (int i = 1; i <= iterations; ++i) {
        // Calculate j1 and j2
        long double j1 = i * param1 - param2;
        long double j2 = i * param1 + param2;

        // Accumulate result with constant terms of j1 and j2
        result += (c1 / j1);
        result -= (c1 / j2);

        // Accumulate result with constant terms of j1 and j2 in reverse order
        result += (c2 / j1);
        result -= (c2 / j2);
    }

    return 4 * result;
}

int main() {
    int iterations = 100_000_000;

    auto start_time = chrono::high_resolution_clock::now();
    long double res

Contains some errors and fixed.    
1. int iterations = 100_000_000;  to  int iterations = 100000000;
2. 1e6.0   to  1e6
3. setprecision(12) - added  #include &lt;iomanip>

In [30]:
!g++ -o optimized_ollama optimized.cpp

In [31]:
!.\optimized_ollama.exe

Result: 1.716814682820
Execution Time: 1.257757 seconds


> **Llama3.2:3b Result is incorrect**

In [32]:
python_hard = """# Be careful to support large number sizes

def lcg(seed, a=1664525, c=1013904223, m=2**32):
    value = seed
    while True:
        value = (a * value + c) % m
        yield value
        
def max_subarray_sum(n, seed, min_val, max_val):
    lcg_gen = lcg(seed)
    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]
    max_sum = float('-inf')
    for i in range(n):
        current_sum = 0
        for j in range(i, n):
            current_sum += random_numbers[j]
            if current_sum > max_sum:
                max_sum = current_sum
    return max_sum

def total_max_subarray_sum(n, initial_seed, min_val, max_val):
    total_sum = 0
    lcg_gen = lcg(initial_seed)
    for _ in range(20):
        seed = next(lcg_gen)
        total_sum += max_subarray_sum(n, seed, min_val, max_val)
    return total_sum

# Parameters
n = 10000         # Number of random numbers
initial_seed = 42 # Initial seed for the LCG
min_val = -10     # Minimum value of random numbers
max_val = 10      # Maximum value of random numbers

# Timing the function
import time
start_time = time.time()
result = total_max_subarray_sum(n, initial_seed, min_val, max_val)
end_time = time.time()

print("Total Maximum Subarray Sum (20 runs):", result)
print("Execution Time: {:.6f} seconds".format(end_time - start_time))
"""

# Stream

In [33]:
def stream_gpt(python):    
    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)
    reply = ""
    for chunk in stream:
        fragment = chunk.choices[0].delta.content or ""
        reply += fragment
        yield reply.replace('```cpp\n','').replace('```','')

In [34]:
def stream_claude(python): 
    result = claude.messages.stream(
        model=CLAUDE_MODEL, 
        max_tokens=2000, 
        system=system_message, 
        messages=[{'role':"user", 'content': user_prompt_for(python) }], 
    ) 
    reply = "" 
    with result as stream: 
        for text in stream.text_stream: 
            reply += text 
            yield reply.replace('```cpp\n','').replace('```', '')

In [35]:
def stream_google(python):
    gemini = google.generativeai.GenerativeModel(model_name='gemini-2.0-flash', system_instruction=system_message)
    response = gemini.generate_content(user_prompt_for(python), stream=True)
    reply = ""

    for chunk in response:
        if chunk.text:
            reply += chunk.text
            #print(chunk.text, end="", flush=True)
            yield reply.replace('```cpp\n','').replace('```', '')


In [36]:
def stream_ollama(python): 
    stream = ollama.chat(model=OLLAMA_MODEL, messages=messages_for(python) , stream=True)
    reply=""
    for chunk in stream:
        if chunk != None:
            fragment = chunk['message']['content']
            reply += fragment
            yield reply.replace('```cpp\n','').replace('```', '')

In [41]:
def optimize(python, model): 
    if model == "GPT": 
        result = stream_gpt(python) 
    elif model == "Claude": 
        result = stream_claude(python) 
    elif model == "Google": 
        result = stream_google(python) 
    elif model == "Ollama": 
        result = stream_ollama(python) 
    else: 
        raise ValueError("Unknown model") 
    for stream_so_far in result: 
        yield stream_so_far

In [37]:
def execute_python(code): 
    try: 
        output=io.StringIO() 
        sys.stdout = output 
        exec(code) 
    finally: 
        sys.stdout = sys.__stdout__ 
    return output.getvalue() 

In [38]:
def execute_cpp(code):  
    write_output(code)  
    try: 
        # compile_cmd = ["clang++", "-Ofast", "-std=c++17", "-march=armv8.5-a", "-mtune=apple-m1", "-mcpu=apple-m1", "-o", "optimized", "optimized.cpp"] 
        compile_cmd = ["g++", "-o", "optimized", "optimized.cpp"] 
        compile_result = subprocess.run(compile_cmd, check=True, text=True, capture_output=True) 
        run_cmd = ["./optimized"]  
        run_result = subprocess.run(run_cmd, check=True, text=True, capture_output=True) 
        return run_result.stdout 
    except subprocess.CalledProcessError as e: 
        return f"An error occured:\n{e.stderr}"

In [39]:
css= """ 
.python {background-color: #306998;} 
.cpp {background-color: #050;}
"""

In [42]:
with gr.Blocks(css=css) as ui: 
    gr.Markdown("# Convert code from Python to C++") 
    with gr.Row(): 
        python = gr.Textbox(label="Python code:", value=pi, lines=10) 
        cpp=gr.Textbox(label="C++ code:", lines=10) 
    with gr.Row(): 
        model=gr.Dropdown(["GPT", "Claude", "Google", "Ollama"], label="Select model", value="GPT")  
    with gr.Row(): 
        convert=gr.Button("Convert Code") 
    with gr.Row(): 
        python_run = gr.Button("Run Python") 
        cpp_run = gr.Button("Run C++")
    with gr.Row(): 
        python_out=gr.TextArea(label="Python result:", elem_classes=["python"]) 
        cpp_out=gr.TextArea(label="C++ resultL", elem_classes=["cpp"]) 

    convert.click(optimize, inputs=[python, model], outputs=[cpp]) 
    python_run.click(execute_python, inputs=[python], outputs=[python_out]) 
    cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out]) 

ui.launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7871

To create a public link, set `share=True` in `launch()`.


