In [None]:
# STEP 1: Import essential libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import re
import joblib
import pickle

# STEP 2: Upload and load the dataset
from google.colab import files
uploaded = files.upload()

df = pd.read_csv("synthetic_code_optimization.csv")

# STEP 3: Feature extraction
def extract_features(code_snippet):
    loop_count = len(re.findall(r'\b(for|while)\b', code_snippet))
    branch_count = len(re.findall(r'\b(if|else if|else|switch|case)\b', code_snippet))
    memory_access_count = len(re.findall(r'\b(malloc|free|new|delete)\b', code_snippet))
    return [loop_count, branch_count, memory_access_count]

X = np.array([extract_features(code) for code in df["Code Snippet"]])

# STEP 4: Label encoding
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df["Optimization Level"])

# STEP 5: Train-test split and scaling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# STEP 6: Build and compile the model
model = keras.Sequential([
    keras.layers.Dense(16, activation='tanh', input_shape=(3,)),
    keras.layers.Dropout(0.48),
    keras.layers.Dense(16, activation='tanh'),
    keras.layers.Dropout(0.33),
    keras.layers.Dense(3, activation='softmax')
])

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.00114),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# STEP 7: Train the model
history = model.fit(X_train_scaled, y_train,
                    epochs=200,
                    batch_size=32,
                    validation_data=(X_test_scaled, y_test),
                    verbose=1)

# STEP 8: Save the trained model and preprocessors
model.save("trained_model.h5")
joblib.dump(scaler, "scaler.save")
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

print("✅ Model and preprocessing tools saved successfully!")

# STEP 9: Print final metrics
final_train_acc = history.history['accuracy'][-1]
final_val_acc = history.history['val_accuracy'][-1]
final_train_loss = history.history['loss'][-1]
final_val_loss = history.history['val_loss'][-1]

print(f"\n📈 Final Training Accuracy: {final_train_acc:.4f}")
print(f"📈 Final Validation Accuracy: {final_val_acc:.4f}")
print(f"📉 Final Training Loss: {final_train_loss:.5f}")
print(f"📉 Final Validation Loss: {final_val_loss:.5f}")


Saving synthetic_code_optimization.csv to synthetic_code_optimization (2).csv
Epoch 1/200


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - accuracy: 0.4745 - loss: 1.0217 - val_accuracy: 0.8580 - val_loss: 0.6162
Epoch 2/200
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.6600 - loss: 0.7371 - val_accuracy: 1.0000 - val_loss: 0.4123
Epoch 3/200
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8114 - loss: 0.5084 - val_accuracy: 1.0000 - val_loss: 0.2830
Epoch 4/200
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8796 - loss: 0.3816 - val_accuracy: 1.0000 - val_loss: 0.1982
Epoch 5/200
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9358 - loss: 0.2829 - val_accuracy: 1.0000 - val_loss: 0.1366
Epoch 6/200
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9390 - loss: 0.2453 - val_accuracy: 1.0000 - val_loss: 0.0936
Epoch 7/200
[1m63/63[0m [32m━━━━━━━━━━━━━━



✅ Model and preprocessing tools saved successfully!

📈 Final Training Accuracy: 0.9995
📈 Final Validation Accuracy: 1.0000
📉 Final Training Loss: 0.00162
📉 Final Validation Loss: 0.00001


In [None]:
import tensorflow as tf
import numpy as np
import re
import subprocess
import joblib

# Load trained model and scaler
model = tf.keras.models.load_model('trained_model.h5')
scaler = joblib.load("scaler.save")

# Feature extraction function (same as used during training)
def extract_features(code_snippet):
    loop_count = len(re.findall(r'\b(for|while)\b', code_snippet))
    branch_count = len(re.findall(r'\b(if|else if|else|switch|case)\b', code_snippet))
    memory_access_count = len(re.findall(r'\b(malloc|free|new|delete)\b', code_snippet))
    return np.array([[loop_count, branch_count, memory_access_count]])

# Code snippet to analyze (runtime input)
print("🔧 Enter your code snippet below (press Enter twice to finish):")
user_code = ""
while True:
    line = input()
    if line.strip() == "":
        break
    user_code += line + "\n"

# Extract and scale features
features = extract_features(user_code)
features_scaled = scaler.transform(features)

# Predict the optimization level (returns class index 0-3)
predicted_index = np.argmax(model.predict(features_scaled))

# Map prediction to compiler flag
optimization_flags = ["-O0", "-O1", "-O2", "-O3"]
selected_flag = optimization_flags[predicted_index]

# Compilation setup
source_file = "example.c"          # Must be saved already
optimized_binary = "optimized.out"
compile_command = f"clang {selected_flag} {source_file} -o {optimized_binary}"
subprocess.run(compile_command, shell=True)

print(f"\n✅ Compiled {source_file} using predicted optimization flag: {selected_flag}")





🔧 Enter your code snippet below (press Enter twice to finish):
""" int main() {     int a = 5;     int b = 10;     int sum = a + b;     return sum; } """
" "  " 
int main() {
    int a=5;
        int b=10;
   int sum=a+b;
  return sum;  }   " " "

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 262ms/step

✅ Compiled example.c using predicted optimization flag: -O1
