<a href="https://colab.research.google.com/github/Eswarchinthakayala-webdesign/SPIC4A61/blob/main/llvm_vs_rl_acc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
!pip install stable-baselines3[extra] gymnasium shimmy

import subprocess
import time
import gym
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from stable_baselines3 import PPO
from gym import spaces
import os


class LLVMEnv(gym.Env):
    def __init__(self):
        super(LLVMEnv, self).__init__()

        # 5 Optimization Levels: None, -O1, -O2, -O3, -Os
        self.action_space = spaces.Discrete(5)

        # State: [code_size, branch_count, loop_count]
        self.observation_space = spaces.Box(low=0, high=1, shape=(3,), dtype=np.float32)
        self.current_state = None

    def generate_random_code(self):
        """
        Dynamically generates a random C program
        """
        code = f"""
        #include<stdio.h>
        int main() {{
            int i, sum=0;
            for(i=0; i<{np.random.randint(100, 500)}; i++) {{
                sum += i;
            }}
            printf("Sum: %d", sum);
            return 0;
        }}
        """
        with open('test.c', 'w') as f:
            f.write(code)

    def get_code_metrics(self):
        """
        Extracts branch count, loop count, and code size from AST
        """
        result = subprocess.run(['clang', '-Xclang', '-ast-dump', 'test.c'],
                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        output = result.stdout.decode('utf-8')
        code_size = len(output)
        branch_count = output.count("IfStmt")
        loop_count = output.count("ForStmt") + output.count("WhileStmt")
        return code_size, branch_count, loop_count

    def reset(self):
        # Generate new code every time
        self.generate_random_code()
        code_size, branch_count, loop_count = self.get_code_metrics()
        self.current_state = np.array([code_size/10000, branch_count/10, loop_count/10])
        return self.current_state

    def step(self, action):
        opt_flags = {0: '', 1: '-O1', 2: '-O2', 3: '-O3', 4: '-Os'}
        flag = opt_flags[action]

        # Compile the code
        subprocess.run(['clang', flag, 'test.c', '-o', 'test_opt'],
                        stdout=subprocess.PIPE, stderr=subprocess.PIPE)

        # Execute and measure time
        start = time.time()
        subprocess.run(['./test_opt'], stdout=subprocess.PIPE)
        end = time.time()

        execution_time = end - start
        reward = -execution_time
        done = True
        info = {'execution_time': execution_time}

        return self.current_state, reward, done, info


env = LLVMEnv()

# Train RL Model
model = PPO("MlpPolicy", env, verbose=1, learning_rate=0.0003, n_steps=2048)
model.learn(total_timesteps=50000)
model.save("rl_optimization_model")

# Test with 20 Samples
llvm_results = []
rl_results = []



Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.00137 |
| time/              |          |
|    fps             | 5        |
|    iterations      | 1        |
|    time_elapsed    | 391      |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1           |
|    ep_rew_mean          | -0.00136    |
| time/                   |             |
|    fps                  | 5           |
|    iterations           | 2           |
|    time_elapsed         | 773         |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.012354421 |
|    clip_fraction        | 0.117       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.6        |
|    explained_variance   | -0.00225    |
|    learning_rate        | 0.

TypeError: unhashable type: 'numpy.ndarray'

In [7]:
for i in range(20):
    # Test LLVM -O3 Optimization
    env.generate_random_code()
    subprocess.run(['clang', '-O3', 'test.c', '-o', 'test_opt'], stdout=subprocess.PIPE)
    start = time.time()
    subprocess.run(['./test_opt'], stdout=subprocess.PIPE)
    end = time.time()
    llvm_time = end - start
    llvm_results.append(llvm_time)

    # Test RL Optimization
    obs = env.reset()
    action, _ = model.predict(obs, deterministic=True)
    action = int(action)  # ✅ Correct way to convert to integer without indexing
    opt_flags = {0: '', 1: '-O1', 2: '-O2', 3: '-O3', 4: '-Os'}
    subprocess.run(['clang', opt_flags[action], 'test.c', '-o', 'test_rl'], stdout=subprocess.PIPE)
    start = time.time()
    subprocess.run(['./test_rl'], stdout=subprocess.PIPE)
    end = time.time()
    rl_time = end - start
    rl_results.append(rl_time)


In [10]:
# ✅ Ensure Equal Length by Padding Missing Values
max_length = max(len(llvm_results), len(rl_results))

# Pad shorter lists with NaN values
llvm_results += [np.nan] * (max_length - len(llvm_results))
rl_results += [np.nan] * (max_length - len(rl_results))

# ✅ Save Accuracy to CSV
accuracy_results = pd.DataFrame({
    'Test Case': range(1, max_length+1),
    'LLVM Execution Time (s)': llvm_results,
    'RL Execution Time (s)': rl_results
})

# ✅ Fill missing NaN values with the average of each column
accuracy_results['LLVM Execution Time (s)'].fillna(accuracy_results['LLVM Execution Time (s)'].mean(), inplace=True)
accuracy_results['RL Execution Time (s)'].fillna(accuracy_results['RL Execution Time (s)'].mean(), inplace=True)

# ✅ Calculate Accuracy
min_time = min(min(llvm_results), min(rl_results))
max_time = max(max(llvm_results), max(rl_results))

llvm_accuracy = [100 - ((x - min_time) / max_time) * 100 for x in llvm_results]
rl_accuracy = [100 - ((x - min_time) / max_time) * 100 for x in rl_results]

# ✅ Add Accuracy to DataFrame
accuracy_results['LLVM Accuracy (%)'] = llvm_accuracy
accuracy_results['RL Accuracy (%)'] = rl_accuracy

# ✅ Save Results
accuracy_results.to_csv('optimization_accuracies_filled.csv', index=False)

print("✅ Missing Values Filled with Average")
print("✅ Results saved to 'optimization_accuracies_filled.csv'")


✅ Missing Values Filled with Average
✅ Results saved to 'optimization_accuracies_filled.csv'


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  accuracy_results['LLVM Execution Time (s)'].fillna(accuracy_results['LLVM Execution Time (s)'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  accuracy_results['RL Execution Time (s)'].fillna(accuracy_results['RL Execution Time (s)'].mean(), inplace=True)
