In [1]:
import pandas as pd

# Load dataset
df = pd.read_csv('cleaned_benchmark_results.csv')

# Only drop rows where 'CPU' or 'Benchmark' is missing
df = df.dropna(subset=['CPU', 'Benchmark'])

# Strip whitespace and convert CPU column to numeric
df['CPU'] = pd.to_numeric(df['CPU'], errors='coerce')
df['Optimization'] = df['Optimization'].str.strip()
df['Category'] = df['Category'].str.strip()

# Drop rows where CPU conversion failed (i.e., became NaN)
df = df[df['CPU'].notna()]

# Drop exact duplicates (if any)
df = df.drop_duplicates()

# Save cleaned version
df.to_csv('cleaned_data.csv', index=False)

print("Cleaned data saved to 'cleaned_data.csv'")

Cleaned data saved to 'cleaned_data.csv'


In [2]:
df.head()

Unnamed: 0,Benchmark,Time,CPU,Iterations,Optimization,Category
0,BM_SimpleLoop,,4540.38,,-O0,Loops
1,BM_SimpleLoop,,4396.42,,-O0,Loops
2,BM_SimpleLoop,,4350.45,,-O0,Loops
3,BM_SimpleLoop,,4360.8,,-O0,Loops
4,BM_SimpleLoop,,4356.94,,-O0,Loops


In [3]:
df.drop(['Time'], axis=1, inplace=True)

In [4]:
df.head()

Unnamed: 0,Benchmark,CPU,Iterations,Optimization,Category
0,BM_SimpleLoop,4540.38,,-O0,Loops
1,BM_SimpleLoop,4396.42,,-O0,Loops
2,BM_SimpleLoop,4350.45,,-O0,Loops
3,BM_SimpleLoop,4360.8,,-O0,Loops
4,BM_SimpleLoop,4356.94,,-O0,Loops


In [5]:
df.drop(['Iterations'], axis=1, inplace=True)

In [6]:
df.head()

Unnamed: 0,Benchmark,CPU,Optimization,Category
0,BM_SimpleLoop,4540.38,-O0,Loops
1,BM_SimpleLoop,4396.42,-O0,Loops
2,BM_SimpleLoop,4350.45,-O0,Loops
3,BM_SimpleLoop,4360.8,-O0,Loops
4,BM_SimpleLoop,4356.94,-O0,Loops


In [7]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import joblib

df = pd.read_csv('cleaned_data.csv')

# Encode categorical columns
df['Optimization'] = df['Optimization'].astype('category').cat.codes
df['Category'] = df['Category'].astype('category').cat.codes

# Feature and target
X = df[['Optimization', 'Category']]
y = df['CPU']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = RandomForestRegressor()
model.fit(X_train, y_train)

joblib.dump(model, 'optimizer_model.pkl')

['optimizer_model.pkl']

In [8]:
import tkinter as tk
from tkinter import messagebox
import joblib
import pandas as pd
import re

# Load model
try:
    model = joblib.load('optimizer_model.pkl')
except Exception as e:
    print("Model load failed:", e)
    model = None

def analyze_code(code):
    # Very simple heuristics
    if "for" in code or "while" in code:
        category = 1  # e.g., loops
    else:
        category = 0  # default

    if "-O3" in code:
        opt_level = 3
    elif "-O2" in code:
        opt_level = 2
    elif "-O1" in code:
        opt_level = 1
    else:
        opt_level = 0

    return opt_level, category

def optimize_code():
    code = code_input.get("1.0", tk.END).strip()

    if not code:
        messagebox.showwarning("Empty Input", "Please paste your code.")
        return

    opt_level, category = analyze_code(code)
    input_data = pd.DataFrame([[opt_level, category]], columns=['Optimization', 'Category'])

    if model:
        try:
            predicted_time = model.predict(input_data)[0]
        except Exception as e:
            predicted_time = -1
            print("Prediction error:", e)
    else:
        predicted_time = -1

    # Suggest based on prediction
    if predicted_time == -1:
        suggestion = "Model failed to predict. Consider using -O2 or -O3 manually."
    elif predicted_time > 4500:
        suggestion = "Use -O3 for high optimization."
    elif predicted_time > 3000:
        suggestion = "Use -O2 for moderate optimization."
    else:
        suggestion = "Use -O1 or keep current settings."

    result = f"Estimated CPU time: {predicted_time:.2f} ms\nSuggested optimization: {suggestion}"
    messagebox.showinfo("Optimization Result", result)

# GUI setup
root = tk.Tk()
root.title("Code Optimizer")

tk.Label(root, text="Paste your code:").pack()
code_input = tk.Text(root, height=10, width=60)
code_input.pack()

tk.Button(root, text="Optimize", command=optimize_code).pack(pady=5)

root.mainloop()