In [10]:
import subprocess
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import time

# Ensure GCC is installed in Colab
!apt-get install gcc

# File names
file_name = "/content/example.c"
preprocessed_file = "/content/preprocessed.c"

# Dummy training data for ML model
X_train = np.array([[5, 2], [50, 5], [100, 10], [200, 20], [300, 30]])
y_train = np.array(['-O1', '-O1', '-O1', '-O2', '-O3'])

# Train simple ML model
model = RandomForestClassifier()
model.fit(X_train, y_train)

def extract_features_from_code(code):
    """Extract features from C code: number of lines and loops."""
    lines = len(code.split('\n'))
    loops = code.count('for') + code.count('while')
    return [lines, loops]

def preprocess_c_code(file_name):
    """Preprocess the C file using GCC."""
    subprocess.run(f"gcc -E -P {file_name} -o {preprocessed_file}", shell=True)
    return preprocessed_file

def compile_and_measure(file_name, opt_flag):
    """Compile and measure execution time."""
    exe_file = "/content/a.out"
    compile_cmd = f"gcc {file_name} {opt_flag} -o {exe_file}"
    subprocess.run(compile_cmd, shell=True)

    start_time = time.perf_counter()
    subprocess.run([exe_file], shell=True)
    end_time = time.perf_counter()

    return end_time - start_time

def optimize_code(file_name):
    """Predict the best optimization flag and compare performance."""
    preprocessed_file = preprocess_c_code(file_name)

    with open(preprocessed_file, 'r') as f:
        code = f.read()

    features = extract_features_from_code(code)
    predicted_opt = model.predict([features])[0]
    print(f"Predicted best optimization flag: {predicted_opt}")

    time_no_opt = compile_and_measure(file_name, "-O0")
    time_opt = compile_and_measure(file_name, predicted_opt)

    if time_no_opt and time_opt:
        improvement = ((time_no_opt - time_opt) / time_no_opt) * 100
        print(f"Execution Time without Optimization: {time_no_opt:.4f}s")
        print(f"Execution Time with {predicted_opt}: {time_opt:.4f}s")
        print(f"Performance Improvement: {improvement:.2f}%")
    else:
        print("Error: Could not measure execution time.")

# Write example C code in Colab
c_code = """#include <stdio.h>\nint main() {\n    printf(\"Hello World\\n\");\n    return 0;\n}"""
with open(file_name, "w") as f:
    f.write(c_code)

# Run optimization
optimize_code(file_name)


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
gcc is already the newest version (4:11.2.0-1ubuntu1).
0 upgraded, 0 newly installed, 0 to remove and 29 not upgraded.
Predicted best optimization flag: -O3
Execution Time without Optimization: 0.0017s
Execution Time with -O3: 0.0017s
Performance Improvement: -0.14%
