<a href="https://colab.research.google.com/github/Soroushav/llm_basics/blob/main/python_to_cpp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import io
import sys
from dotenv import load_dotenv
from openai import OpenAI
from IPython.display import Markdown, display, update_display
import gradio as gr
import subprocess

In [2]:
from google.colab import userdata
openai_api_key = userdata.get('OPENAI_API_KEY')
openai = OpenAI(api_key=openai_api_key)
MODEL_GPT = "gpt-5-nano"

In [3]:
system_message = "You are an assistant that reimplements Python code in high performance C++ for an M4 Macbook air. "
system_message += "Respond only with C++ code; use comments sparingly and do not provide any explanation other than occasional comments. "
system_message += "The C++ response needs to produce an identical output in the fastest possible time."

In [4]:
def get_prompt_for(python):
  user_prompt = "Rewrite this Python code in C++ with the fastest possible implementation that produces identical output in the least time. "
  user_prompt += "Respond only with C++ code; do not explain your work other than a few comments. "
  user_prompt += "Pay attention to number types to ensure no int overflows. Remember to #include all necessary C++ packages such as iomanip.\n\n"
  user_prompt += python
  return user_prompt


In [5]:
def message_for(python):
  return [{
      "role": "system",
      "content": system_message
  }, {
      "role": "user",
      "content": get_prompt_for(python)
  }]

In [6]:
def write_output(cpp):
  with open("output.cpp", "w") as f:
    f.write(cpp)

In [7]:
def optimize_for(python):
  stream = openai.chat.completions.create(model=MODEL_GPT, messages=message_for(python), stream=True)
  result = ""
  for chunk in stream:
    content = chunk.choices[0].delta.content or ""
    result += content
    print(content, end='', flush=True)
  write_output(result.replace("```", "").replace("```cpp","").replace("cpp", ""))

In [8]:
pi = """
import time

def calculate(iterations, param1, param2):
    result = 1.0
    for i in range(1, iterations+1):
        j = i * param1 - param2
        result -= (1/j)
        j = i * param1 + param2
        result += (1/j)
    return result

start_time = time.time()
result = calculate(100_000_000, 4, 1) * 4
end_time = time.time()

print(f"Result: {result:.12f}")
print(f"Execution Time: {(end_time - start_time):.6f} seconds")
"""

In [None]:
exec(pi)

Result: 3.141592658589
Execution Time: 17.917721 seconds


In [None]:
optimize_for(pi)

#include <iostream>
#include <iomanip>
#include <chrono>

int main() {
    const long long iterations = 100000000LL;
    const double param1 = 4.0;
    const double param2 = 1.0;

    double result = 1.0;

    auto t0 = std::chrono::high_resolution_clock::now();

    // Use incremental product to avoid repeated multiplications
    double prod = param1; // equals i * param1 for i = 1
    for (long long i = 1; i <= iterations; ++i) {
        double j = prod - param2;
        result -= 1.0 / j;

        j = prod + param2;
        result += 1.0 / j;

        prod += param1; // prepare for next i
    }

    result *= 4.0;

    auto t1 = std::chrono::high_resolution_clock::now();
    std::cout.setf(std::ios::fixed);
    std::cout << std::setprecision(12);
    std::cout << "Result: " << result << "\n";
    std::cout << "Execution Time: "
              << std::setprecision(6)
              << std::chrono::duration_cast<std::chrono::duration<double>>(t1 - t0).count()
              << " seconds\

In [9]:
python_hard = """# Be careful to support large number sizes

def lcg(seed, a=1664525, c=1013904223, m=2**32):
    value = seed
    while True:
        value = (a * value + c) % m
        yield value

def max_subarray_sum(n, seed, min_val, max_val):
    lcg_gen = lcg(seed)
    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]
    max_sum = float('-inf')
    for i in range(n):
        current_sum = 0
        for j in range(i, n):
            current_sum += random_numbers[j]
            if current_sum > max_sum:
                max_sum = current_sum
    return max_sum

def total_max_subarray_sum(n, initial_seed, min_val, max_val):
    total_sum = 0
    lcg_gen = lcg(initial_seed)
    for _ in range(20):
        seed = next(lcg_gen)
        total_sum += max_subarray_sum(n, seed, min_val, max_val)
    return total_sum

# Parameters
n = 10000         # Number of random numbers
initial_seed = 42 # Initial seed for the LCG
min_val = -10     # Minimum value of random numbers
max_val = 10      # Maximum value of random numbers

# Timing the function
import time
start_time = time.time()
result = total_max_subarray_sum(n, initial_seed, min_val, max_val)
end_time = time.time()

print("Total Maximum Subarray Sum (20 runs):", result)
print("Execution Time: {:.6f} seconds".format(end_time - start_time))
"""

In [None]:
exec(python_hard)

Total Maximum Subarray Sum (20 runs): 10980
Execution Time: 68.234475 seconds


In [None]:
optimize_for(python_hard)

#include <iostream>
#include <iomanip>
#include <chrono>
#include <cstdint>
#include <climits>

int main() {
    const int n = 10000;
    const uint32_t initial_seed = 42;
    const int min_val = -10;
    const int max_val = 10;
    const uint64_t A = 1664525ULL;
    const uint64_t C = 1013904223ULL;

    // Kadane-based computation over on-the-fly generated sequence
    auto max_subarray_sum_with_seed = [&](uint32_t seed) -> long long {
        long long max_sum = LLONG_MIN;
        long long current = 0;
        uint32_t value = seed;
        unsigned long long range = (unsigned long long)(static_cast<long long>(max_val) - static_cast<long long>(min_val) + 1LL);

        for (int i = 0; i < n; ++i) {
            uint64_t prod = A * static_cast<uint64_t>(value) + C;
            value = static_cast<uint32_t>(prod & 0xFFFFFFFFULL);
            long long rnd = static_cast<long long>(min_val) + static_cast<long long>(value % range);
            current = rnd > current + rnd ? rnd : curren

In [10]:
def stream_gpt(python):
  stream = openai.chat.completions.create(model=MODEL_GPT, messages=message_for(python), stream=True)
  result = ""
  for chunk in stream:
    content = chunk.choices[0].delta.content or ""
    result += content
    yield result

In [None]:
with gr.Blocks() as ui:
  gr.Markdown("Your code generator from python to c++")
  with gr.Row():
    python = gr.Textbox(label="Python input:", value=pi, lines=10)
    cpp = gr.Textbox(label="CPP output:", lines=10)
  with gr.Row():
    convert = gr.Button("Convert")
  convert.click(fn=stream_gpt, inputs=[python], outputs=[cpp])
ui.launch()

In [11]:
OLLAMA_BASE_URL = "https://charmlessly-unbenumbed-matthew.ngrok-free.dev"
ollama = OpenAI(base_url=f"{OLLAMA_BASE_URL}/v1", api_key="ollama")

In [12]:
def stream_ollama(python):
  stream = ollama.chat.completions.create(model="llama3.2:latest", messages=message_for(python), stream=True)
  result = ""
  for chunk in stream:
    content = chunk.choices[0].delta.content or ""
    result += content
    yield result


In [13]:
def generate(python, model):
  if model == "GPT":
    result = stream_gpt(python)
  elif model == "OLLAMA":
    result = stream_ollama(python)
  for streamed_so_far in result:
    yield streamed_so_far

In [None]:
with gr.Blocks() as ui:
  gr.Markdown("Your code generator from python to c++")
  with gr.Row():
    python = gr.Textbox(label="Python input:", value=pi, lines=10)
    cpp = gr.Textbox(label="CPP output:", lines=10)
  with gr.Row():
    model = gr.Dropdown(label="Model", choices=["GPT", "OLLAMA"], value="GPT")
    convert = gr.Button("Convert")
  convert.click(fn=generate, inputs=[python, model], outputs=[cpp])
ui.launch()

In [14]:
from huggingface_hub import login
from google.colab import userdata

In [15]:
hf_token = userdata.get('HF_TOKEN')
login(token=hf_token, add_to_git_credential=True)

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Phind/Phind-CodeLlama-34B-v2")
model = AutoModelForCausalLM.from_pretrained("Phind/Phind-CodeLlama-34B-v2")

KeyboardInterrupt: 

2.1G	/usr/local/lib/python3.12/dist-packages/libcugraph/lib64/libcugraph.so
1.3G	/usr/local/lib/python3.12/dist-packages/libcuvs/lib64/libcuvs.so
1.1G	/usr/local/lib/python3.12/dist-packages/tensorflow/libtensorflow_cc.so.2
930M	/usr/local/lib/python3.12/dist-packages/torch/lib/libtorch_cuda.so
839M	/usr/local/lib/python3.12/dist-packages/libcudf/lib64/libcudf.so
685M	/usr/local/cuda-12.5/targets/x86_64-linux/lib/libcublasLt_static.a
640M	/usr/lib/x86_64-linux-gnu/libmkl_core.a
634M	/root/.julia/artifacts/07a1d6a5d855689372d59e5bf865f5a63cb9e14e/lib/libReactantExtra.so
523M	/usr/local/lib/python3.12/dist-packages/nvidia/cudnn/lib/libcudnn_engines_precompiled.so.9
469M	/usr/local/lib/python3.12/dist-packages/nvidia/cublas/lib/libcublasLt.so.12
432M	/usr/local/lib/python3.12/dist-packages/nvidia/cusparselt/lib/libcusparseLt.so.0
431M	/usr/lib/x86_64-linux-gnu/libcudnn_engines_precompiled.so.9.2.1
427M	/usr/local/cuda-12.5/targets/x86_64-linux/lib/libcublasLt.so.12.5.3.2
411M	/usr/local/l

In [2]:
!rm -rf ~/.cache/huggingface

In [None]:
messages = tokenizer.apply_chat_template(message_for(pi), tokenize=False, return_tensors="pt", add_generation_prompt=True).to("cuda")
stream = model.generate(**messages, stream=True, details=True, max_new_tokens=400)
result = ""
for r in stream:
  result += r.token.text
  yield result