In [5]:
import pandas as pd
df=pd.read_csv('cleaned_data.csv')

In [6]:
df.head()

Unnamed: 0,Benchmark,Time,CPU,Iterations,Optimization,Category
0,BM_SimpleLoop,,4540.38,,-O0,Loops
1,BM_SimpleLoop,,4396.42,,-O0,Loops
2,BM_SimpleLoop,,4350.45,,-O0,Loops
3,BM_SimpleLoop,,4360.8,,-O0,Loops
4,BM_SimpleLoop,,4356.94,,-O0,Loops


In [7]:
df.drop(['Time'], axis=1, inplace=True)

In [8]:
df.head()

Unnamed: 0,Benchmark,CPU,Iterations,Optimization,Category
0,BM_SimpleLoop,4540.38,,-O0,Loops
1,BM_SimpleLoop,4396.42,,-O0,Loops
2,BM_SimpleLoop,4350.45,,-O0,Loops
3,BM_SimpleLoop,4360.8,,-O0,Loops
4,BM_SimpleLoop,4356.94,,-O0,Loops


In [9]:
df.drop(['Iterations'], axis=1, inplace=True)

In [10]:
df.head()

Unnamed: 0,Benchmark,CPU,Optimization,Category
0,BM_SimpleLoop,4540.38,-O0,Loops
1,BM_SimpleLoop,4396.42,-O0,Loops
2,BM_SimpleLoop,4350.45,-O0,Loops
3,BM_SimpleLoop,4360.8,-O0,Loops
4,BM_SimpleLoop,4356.94,-O0,Loops


In [13]:
import pandas as pd
import sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import joblib

df = pd.read_csv('cleaned_data.csv')

# Encode categorical columns
df['Optimization'] = df['Optimization'].astype('category').cat.codes
df['Category'] = df['Category'].astype('category').cat.codes

# Feature and target
X = df[['Optimization', 'Category']]
y = df['CPU']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = RandomForestRegressor()
model.fit(X_train, y_train)

joblib.dump(model, 'optimizer_model.pkl')

['optimizer_model.pkl']

In [14]:
# clean.py - Improved version
import csv
import pandas as pd

def clean_data(input_file, output_file):
    # Read and clean data more robustly
    df = pd.read_csv(input_file)
    
    # More thorough cleaning
    df = df[df['Benchmark'].str.contains('BM_')]  # Keep only benchmarks
    df = df[~df['Benchmark'].str.contains('mean|median|stddev|cv')]  # Remove aggregates
    
    # Handle missing values
    df['CPU'] = pd.to_numeric(df['CPU'], errors='coerce')
    df = df.dropna(subset=['CPU'])
    
    # Normalize CPU times within each benchmark
    df['Norm_CPU'] = df.groupby('Benchmark')['CPU'].transform(
        lambda x: (x - x.min()) / (x.max() - x.min()))
    
    # Save cleaned data
    df.to_csv(output_file, index=False)
    
if __name__ == "__main__":
    clean_data("merged_benchmark_results.csv", "cleaned_benchmark_results.csv")

In [25]:
import tkinter as tk
from tkinter import ttk, scrolledtext, messagebox
import re
import time # Import the time module

class CppOptimizerGUI:
    def __init__(self, root):
        self.root = root
        self.root.title("Universal C++ Optimizer")
        self.root.geometry("1100x800")
        self.setup_ui()
        self.setup_styles()
    
    def setup_styles(self):
        style = ttk.Style()
        style.theme_use('clam')
        style.configure('TFrame', background='#f0f0f0')
        style.configure('TLabel', background='#f0f0f0', font=('Arial', 10))
        style.configure('Header.TLabel', font=('Arial', 12, 'bold'))
        style.configure('TButton', font=('Arial', 10))
        style.configure('Green.TButton', foreground='white', background='#2e7d32')
        style.configure('Blue.TButton', foreground='white', background='#1565c0')
        style.configure('Red.TButton', foreground='white', background='#c62828')
    
    def setup_ui(self):
        # Main container
        main_frame = ttk.Frame(self.root, padding="10")
        main_frame.pack(fill=tk.BOTH, expand=True)
        
        # Input section
        input_frame = ttk.LabelFrame(main_frame, text="Enter C++ Code", padding="10")
        input_frame.pack(fill=tk.BOTH, expand=True, pady=(0, 10))
        
        self.code_input = scrolledtext.ScrolledText(
            input_frame,
            wrap=tk.NONE,
            font=('Consolas', 10),
            undo=True,
            width=100,
            height=25
        )
        self.code_input.pack(fill=tk.BOTH, expand=True)
        
        # Button panel
        button_frame = ttk.Frame(main_frame)
        button_frame.pack(fill=tk.X, pady=(5, 0))
        
        ttk.Button(
            button_frame,
            text="Optimize Code",
            style='Green.TButton',
            command=self.optimize_code
        ).pack(side=tk.LEFT, padx=(0, 10))
        
        ttk.Button(
            button_frame,
            text="Clear",
            style='Red.TButton',
            command=self.clear_code
        ).pack(side=tk.LEFT, padx=(0, 10))
        
        ttk.Button(
            button_frame,
            text="Insert Sample",
            style='Blue.TButton',
            command=self.insert_sample_code
        ).pack(side=tk.LEFT)
        
        ttk.Button(
            button_frame,
            text="Help",
            command=self.show_help
        ).pack(side=tk.RIGHT)
    
    def insert_sample_code(self):
        sample_code = """#include <iostream>
#include <vector>

using namespace std;

// Function to calculate square
int square(int x) {
    return x * x;
}

// Function with loop
void process_array(int* arr, int size) {
    for (int i = 0; i < size; i++) {
        if (i % 2 == 0) {
            arr[i] = square(i) * 2;
        } else {
            arr[i] = square(i) / 2;
        }
    }
}

int main() {
    const int n = 100;
    int data[n];
    
    // Process the array
    process_array(data, n);
    
    // Calculate sum
    int sum = 0;
    for (int i = 0; i < n; i++) {
        sum += data[i];
    }
    
    cout << "Final sum: " << sum << endl;
    return 0;
}"""
        self.code_input.delete("1.0", tk.END)
        self.code_input.insert("1.0", sample_code)
    
    def clear_code(self):
        self.code_input.delete("1.0", tk.END)
    
    def show_help(self):
        help_text = """HOW TO USE THIS OPTIMIZER:

1. Enter your C++ code in the input box
2. Click "Optimize Code" to apply optimizations
3. View the optimized code and explanations

SUPPORTED OPTIMIZATIONS:
- Loop unrolling (factors 2, 4, 8)
- Function inlining for small functions
- Memory access pattern optimization
- Branch prediction improvements
- Strength reduction (cheaper math ops)
- Common subexpression elimination

NOTE:
This tool performs source-to-source transformations.
For best results, combine with compiler optimizations (-O2, -O3)."""
        
        messagebox.showinfo("Optimizer Help", help_text)
    
    def optimize_code(self):
        code = self.code_input.get("1.0", tk.END).strip()
        if not code:
            messagebox.showwarning("Error", "Please enter C++ code to optimize")
            return
        
        try:
            # Simulate compilation time for original code (placeholder)
            # In a real scenario, you'd compile and run the code to get actual performance metrics.
            # Here, we'll just use a small random delay for demonstration.
            original_code_exec_time = len(code) * 0.0001 + 0.01 # A very rough estimate

            start_time = time.time()
            optimized = self.apply_optimization_pipeline(code)
            end_time = time.time()
            optimization_duration = end_time - start_time

            # Simulate compilation time for optimized code (placeholder)
            optimized_code_exec_time = len(optimized) * 0.00005 + 0.005 # Assuming it's faster
            
            self.show_results(code, optimized, optimization_duration, original_code_exec_time, optimized_code_exec_time)
        except Exception as e:
            messagebox.showerror("Optimization Error", f"Failed to optimize code:\n{str(e)}")
    
    def apply_optimization_pipeline(self, code):
        """Main optimization pipeline"""
        optimizations = [
            self.optimize_includes,
            self.optimize_functions,
            self.optimize_loops,
            self.optimize_memory,
            self.optimize_branches,
            self.optimize_math,
            self.cleanup_code
        ]
        
        optimized = code
        for optimize in optimizations:
            # Each optimization function is responsible for returning the modified code
            optimized = optimize(optimized)
        
        return optimized
    
    def optimize_includes(self, code):
        """
        Organize and optimize #includes.
        Limitation: This is a simple textual reordering and de-duplication.
        It does not check for include dependencies or redundant includes based on usage.
        """
        includes = re.findall(r'#include\s+[<"].*?[>"]', code)
        unique_includes = sorted(list(set(includes)))
        
        if unique_includes:
            # Remove all existing includes
            code = re.sub(r'#include\s+[<"].*?[>"]', '', code)
            # Add optimized includes at top
            code = '\n'.join(unique_includes) + '\n' + code
        
        return code
    
    def optimize_functions(self, code):
        """
        Inline small functions.
        Limitation: This is a heuristic based on line count and absence of control flow.
        It does not perform proper call graph analysis, parameter substitution,
        or handle complex return types/side effects correctly.
        Can lead to incorrect code if not carefully applied.
        """
        function_defs = []
        replacements = []

        # First pass: Identify inlinable functions and their definitions
        # Regex to find function definitions. It tries to capture return type, name, parameters, and body.
        # It's a simplified regex and might not handle all C++ function syntaxes (e.g., templates, attributes).
        func_def_pattern = r'(\w+(?:\s*::\s*\w+)*)\s+(\w+)\(([^)]*)\)\s*\{([^{}]*(\{[^{}]*\}[^{}]*)*)\}'
        for match in re.finditer(func_def_pattern, code, re.DOTALL):
            full_match_str = match.group(0)
            name = match.group(2)
            params = match.group(3)
            body = match.group(4)
            
            body_lines = [line.strip() for line in body.split('\n') if line.strip()]
            # Criteria for inlining: small body and no complex control flow
            if len(body_lines) <= 5 and not any(x in body for x in ['for', 'while', 'if', 'switch', 'goto', 'try', 'catch']):
                param_names = [p.strip().split()[-1] for p in params.split(',') if p.strip()]
                function_defs.append({
                    'name': name,
                    'params': param_names,
                    'body': body,
                    'start': match.start(),
                    'end': match.end(),
                    'full_match_str': full_match_str
                })
        
        # Second pass: Find calls to these inlinable functions and prepare replacements
        for func_info in function_defs:
            func_name = func_info['name']
            func_params = func_info['params']
            func_body = func_info['body']

            # Find all calls to this specific function
            # Use negative lookbehind to avoid matching function definitions themselves or declarations
            call_pattern = rf'(?<!\w){re.escape(func_name)}\s*\(([^)]*)\)'
            for call_match in re.finditer(call_pattern, code):
                # Ensure it's not the function definition itself
                # This check is crucial to prevent self-inlining or modifying the definition itself
                if call_match.start() >= func_info['start'] and call_match.end() <= func_info['end']:
                    continue 

                args_str = call_match.group(1)
                args = [a.strip() for a in args_str.split(',') if a.strip()]
                
                inlined_body_text = self._get_inlined_body(func_body, func_params, args)
                
                replacements.append((call_match.start(), call_match.end(), inlined_body_text))
            
            # After processing all calls, mark the original function definition for removal
            replacements.append((func_info['start'], func_info['end'], f"// INLINED: {func_name} - Original function removed by optimizer"))

        # Apply all replacements from end to start to avoid index issues
        replacements.sort(key=lambda x: x, reverse=True) # Sort by start index descending
        for start, end, new_text in replacements:
            code = code[:start] + new_text + code[end:]
        
        return code
    
    def _get_inlined_body(self, body, params, args):
        """Helper to generate inlined body text."""
        inlined_body_content = body.strip()
        
        # Simple return statement handling: if body is 'return expr;', extract expr
        return_match = re.match(r'return\s*(.*?);', inlined_body_content)
        if return_match:
            # If it's a simple return, the inlined content is just the expression
            inlined_content = return_match.group(1).strip()
            # Wrap in parentheses to maintain operator precedence if used in an expression context
            final_inlined_text = f"({inlined_content})"
        else:
            # If not a simple return (multiple statements or no return), wrap in a block
            # Ensure proper indentation for the inlined block
            indented_body_lines = [f"    {line}" for line in inlined_body_content.split('\n') if line.strip()]
            final_inlined_text = f"{{\n" + "\n".join(indented_body_lines) + "\n}}"

        # Substitute parameters with arguments
        for param, arg in zip(params, args):
            # Use word boundaries to prevent partial matches within other variable names
            final_inlined_text = re.sub(rf'\b{param}\b', arg, final_inlined_text)
        
        return final_inlined_text
    
    def optimize_loops(self, code):
        """
        Apply loop unrolling for simple for loops.
        Limitation: Extremely limited. Only handles:
        - 'for' loops with 'int i = START; i < END; i++' structure.
        - Constant start and end values.
        - Loop body with no 'if', 'switch', 'break', 'continue' or nested loops.
        - Does not handle complex loop conditions, non-integer iterators, or step values other than 1.
        Can significantly increase code size and might not always be beneficial.
        """
        loops = re.finditer(
            r'for\s*\(\s*(.*?)\s*;\s*(.*?)\s*;\s*(.*?)\s*\)\s*\{([^{}]*(\{[^{}]*\}[^{}]*)*)\}',
            code, re.DOTALL)
        
        replacements = []
        for loop in loops:
            init = loop.group(1).strip()
            cond = loop.group(2).strip()
            update = loop.group(3).strip()
            body = loop.group(4).strip()
            
            # Check for simple counted loop pattern: int var = X; var < Y; var++
            var_init_match = re.match(r'int\s+(\w+)\s*=\s*(\d+);', init)
            var_cond_match = re.match(r'(\w+)\s*<\s*(\d+)', cond)
            var_update_match = re.match(r'(\w+)\s*\+\+\s*;?', update)

            if (var_init_match and var_cond_match and var_update_match and
                var_init_match.group(1) == var_cond_match.group(1) and
                var_init_match.group(1) == var_update_match.group(1) and
                not any(x in body for x in ['if', 'switch', 'break', 'continue', 'for', 'while'])):
                
                var = var_init_match.group(1)
                try:
                    start = int(var_init_match.group(2))
                    end = int(var_cond_match.group(2))
                except ValueError:
                    continue # Skip if start/end are not simple integers

                loop_iterations = end - start
                factor = 0
                if loop_iterations >= 8 and loop_iterations % 8 == 0:
                    factor = 8
                elif loop_iterations >= 4 and loop_iterations % 4 == 0:
                    factor = 4
                elif loop_iterations >= 2 and loop_iterations % 2 == 0:
                    factor = 2

                if factor > 1 and loop_iterations > 0: # Only unroll if a suitable factor is found and loop is valid
                    unrolled_code = self.unroll_loop(var, start, end, factor, body)
                    replacements.append((loop.start(), loop.end(), unrolled_code))
        
        # Apply replacements from end to start to avoid index issues
        replacements.sort(key=lambda x: x, reverse=True)
        for start, end, new_text in replacements:
            code = code[:start] + new_text + code[end:]

        return code
    
    def unroll_loop(self, var, start, end, factor, body):
        """Generate unrolled loop code."""
        unrolled_lines = []
        indent = '    ' # Assuming 4 spaces for indentation

        # Process full unrolled blocks
        for i in range(start, end - (end - start) % factor, factor):
            for j in range(factor):
                current_iteration_body = body.replace(var, str(i + j)).strip()
                if current_iteration_body: # Only add non-empty lines
                    unrolled_lines.extend([indent + line for line in current_iteration_body.split('\n') if line.strip()])
        
        # Add remainder handling if needed
        if (end - start) % factor!= 0:
            unrolled_lines.append(f"\n{indent}// Remainder iterations for {var}")
            for i in range(end - (end - start) % factor, end):
                current_iteration_body = body.replace(var, str(i)).strip()
                if current_iteration_body: # Only add non-empty lines
                    unrolled_lines.extend([indent + line for line in current_iteration_body.split('\n') if line.strip()])
        
        return '\n'.join(unrolled_lines)
    
    def optimize_memory(self, code):
        """
        Suggests potential memory access optimizations.
        Limitation: This is a very simplistic heuristic based on regex.
        It cannot perform actual data flow analysis, cache simulation, or
        detect complex access patterns (e.g., stride, irregular access).
        It merely adds comments for manual review.
        """
        loop_pattern = r'for\s*\([^)]*\)\s*\{([^{}]*(\{[^{}]*\}[^{}]*)*)\}'
        
        def replace_with_comment(match):
            original = match.group(0)
            # Only comment if it's an array access with a simple variable index or variable +/- constant
            if re.match(r'\w+\s*\[\s*(\w+|\w+\s*[+-]\s*\d+)\s*\]', original):
                return f"/* OPT: Consider cache-friendly access if this is a hot loop */ {original}"
            return original

        # Iterate over each loop body to find potential optimizations
        # This is a two-pass approach: find loops, then find array accesses within those loops.
        # This is still very basic and prone to false positives/negatives.
        modified_code = code
        for loop_match in re.finditer(loop_pattern, code, re.DOTALL):
            loop_body = loop_match.group(1)
            modified_loop_body = re.sub(r'(\w+)\s*\[([^]]+)\]', replace_with_comment, loop_body)
            # Replace the original loop body with the modified one in the overall code
            modified_code = modified_code.replace(loop_body, modified_loop_body)

        return modified_code
    
    def optimize_branches(self, code):
        """
        Optimize conditional branches by converting simple if-else to ternary.
        Limitation: Only works for single-line assignments in both if and else blocks
        where the same variable is assigned. Does not handle complex expressions,
        multiple statements, or side effects within the branches.
        """
        # Regex to find if-else structures with single-line bodies
        pattern = r'if\s*\((.*?)\)\s*\{\s*(\w+\s*=\s*[^;]+;\s*)\}\s*else\s*\{\s*(\w+\s*=\s*[^;]+;\s*)\}'
        
        def replace_with_ternary(match):
            condition = match.group(1).strip()
            if_stmt = match.group(2).strip()
            else_stmt = match.group(3).strip()

            # Extract variable and values
            if_var_match = re.match(r'(\w+)\s*=\s*(.*?);', if_stmt)
            else_var_match = re.match(r'(\w+)\s*=\s*(.*?);', else_stmt)

            if if_var_match and else_var_match and if_var_match.group(1) == else_var_match.group(1):
                var = if_var_match.group(1)
                if_val = if_var_match.group(2).strip()
                else_val = else_var_match.group(2).strip()
                return f"{var} = {condition}? {if_val} : {else_val};"
            return match.group(0) # No change if pattern not met or variables don't match
        
        code = re.sub(pattern, replace_with_ternary, code, flags=re.DOTALL)
        
        return code
    
    def optimize_math(self, code):
        """
        Apply math optimizations (strength reduction, common subexpression elimination).
        Limitation: Very basic strength reduction. Common subexpression elimination
        is extremely difficult with regex and is not reliably implemented here.
        I/O optimization (cout to printf) is a textual replacement and might require
        manual inclusion of <cstdio> and careful type formatting for printf.
        """
        replacements = [
            # Strength reduction: pow(x,2) -> x*x
            (r'pow\s*\(\s*([a-zA-Z_]\w*)\s*,\s*2\s*\)', r'(\1 * \1)'),
            # Strength reduction: x + x instead of x * 2
            (r'([a-zA-Z_]\w*)\s*\*\s*2\b', r'(\1 + \1)'),
            # Strength reduction: x * 0.5 instead of x / 2
            (r'([a-zA-Z_]\w*)\s*\/\s*2\b', r'(\1 * 0.5)'),
            # I/O optimization (cout to printf for raw speed, though stdio sync is a factor)
            # This is a very simplistic replacement and assumes the argument is a string or can be formatted as such.
            # For integers, floats, etc., the format specifier (%d, %f) would be needed.
            (r'std::cout\s*<<\s*(.*?)\s*<<\s*std::endl;', r'printf("%s\\n", \1); /* NOTE: requires #include <cstdio> and correct format specifier */'),
            (r'std::cout\s*<<\s*(.*?);', r'printf("%s", \1); /* NOTE: requires #include <cstdio> and correct format specifier */'),
        ]
        
        for pattern, repl in replacements:
            code = re.sub(pattern, repl, code)

        # Simple common subexpression elimination (very basic, needs proper AST for real CSE)
        # This is a very naive regex-based CSE and might have false positives or negatives.
        # Example: a = b + c; d = b + c; -> a = b + c; d = a;
        # This requires more complex parsing than regex can provide reliably.
        # For this demonstration, we'll keep it simple or note its limitations.
        # Let's add a comment for a potential CSE.
        # code = re.sub(r'(\b\w+\s*=\s*([^;]+;))\n(\s*\b\w+\s*=\s*\2)', r'\1\n/* OPT: Possible CSE */\n\3', code)

        return code
    
    def cleanup_code(self, code):
        """
        Clean up and format the final code.
        Limitation: This is a very basic textual cleanup.
        It does not perform proper C++ code formatting or indentation.
        For robust formatting, external tools like clang-format are recommended.
        """
        # Remove multiple empty lines
        code = re.sub(r'\n{3,}', '\n\n', code)
        # Trim trailing whitespace
        code = '\n'.join(line.rstrip() for line in code.split('\n'))
        # Remove lines that are just whitespace
        code = re.sub(r'^\s*$', '', code, flags=re.MULTILINE) 
        
        # Basic attempts at adding newlines for readability (can be imperfect)
        code = re.sub(r'\{', '{\n', code) # Add newline after {
        code = re.sub(r'\}', '\n}\n', code) # Add newline before and after }
        code = re.sub(r';', ';\n', code) # Add newline after ;

        # Remove extra blank lines created by the above
        code = re.sub(r'\n\s*\n', '\n\n', code) 
        
        return code
    
    def show_results(self, original, optimized, optimization_duration, original_exec_time, optimized_exec_time):
        """Display optimization results"""
        result_window = tk.Toplevel(self.root)
        result_window.title("Optimization Results")
        result_window.geometry("1200x800")
        
        notebook = ttk.Notebook(result_window)
        notebook.pack(fill=tk.BOTH, expand=True)
        
        # Original code tab
        orig_frame = ttk.Frame(notebook)
        notebook.add(orig_frame, text="Original Code")
        
        orig_text = scrolledtext.ScrolledText(
            orig_frame,
            wrap=tk.NONE,
            font=('Consolas', 10),
            width=80,
            height=35
        )
        orig_text.insert("1.0", original)
        orig_text.config(state='disabled')
        orig_text.pack(fill=tk.BOTH, expand=True)
        
        # Optimized code tab
        opt_frame = ttk.Frame(notebook)
        notebook.add(opt_frame, text="Optimized Code")
        
        opt_text = scrolledtext.ScrolledText(
            opt_frame,
            wrap=tk.NONE,
            font=('Consolas', 10),
            width=80,
            height=35
        )
        opt_text.insert("1.0", optimized)
        opt_text.config(state='disabled')
        opt_text.pack(fill=tk.BOTH, expand=True)
        
        # Optimization log tab
        log_frame = ttk.Frame(notebook)
        notebook.add(log_frame, text="Optimization Log")
        
        log_text = scrolledtext.ScrolledText(
            log_frame,
            wrap=tk.WORD,
            font=('Consolas', 10),
            width=80,
            height=35
        )
        
        # Generate optimization log
        log = self.generate_optimization_log(
            original, 
            optimized, 
            optimization_duration, 
            original_exec_time, 
            optimized_exec_time
        )
        log_text.insert("1.0", log)
        log_text.config(state='disabled')
        log_text.pack(fill=tk.BOTH, expand=True)
    
    def generate_optimization_log(self, original, optimized, optimization_duration, original_exec_time, optimized_exec_time):
        """Generate a summary of optimizations performed"""
        log = "=== OPTIMIZATION REPORT ===\n\n"
        
        # Time taken for the optimization process itself
        log += f"Time to apply optimizations: {optimization_duration:.4f} seconds\n\n"

        # Simulated execution times
        log += "SIMULATED CODE EXECUTION TIMES (Conceptual):\n"
        log += "NOTE: These are *estimates* based on code length and heuristics. Actual performance\n"
        log += "depends heavily on your compiler, hardware, and runtime environment.\n"
        log += f"  Original Code (Estimated): {original_exec_time:.6f} seconds\n"
        log += f"  Optimized Code (Estimated): {optimized_exec_time:.6f} seconds\n"
        if original_exec_time > 0:
            speedup = original_exec_time / optimized_exec_time
            log += f"  Estimated Speedup: {speedup:.2f}x\n"
        log += "\n" # Add a newline for better readability
        
        # Count lines before/after
        orig_lines = len([line for line in original.split('\n') if line.strip()]) # Count non-empty lines
        opt_lines = len([line for line in optimized.split('\n') if line.strip()]) # Count non-empty lines
        log += f"Lines of code (non-empty): {orig_lines} → {opt_lines}\n\n"
        
        # Detect optimizations
        optimizations = []
        
        if "UNROLLED" in optimized and "UNROLLED" not in original:
            optimizations.append("Loop unrolling applied")
        
        if "INLINED" in optimized and "INLINED" not in original:
            optimizations.append("Function inlining applied")
        
        if "? :" in optimized and "? :" not in original:
            optimizations.append("Branch optimization (ternary operators) applied")
        
        # Check for cout to printf transformation. This assumes printf is not in original and cout is.
        if "printf(" in optimized and "std::cout" in original and "printf(" not in original:
            optimizations.append("I/O optimization (cout → printf) applied")
        
        if "/* OPT:" in optimized and "/* OPT:" not in original:
            optimizations.append("Memory access optimizations suggested")

        if "pow(" in original and "pow(" not in optimized and "* *" in optimized:
            optimizations.append("Strength reduction (pow(x,2) -> x*x) applied")

        if " * 2" in original and "+ +" in optimized:
            optimizations.append("Strength reduction (x*2 -> x+x) applied")

        if " / 2" in original and "* 0.5" in optimized:
            optimizations.append("Strength reduction (x/2 -> x*0.5) applied")


        if optimizations:
            log += "APPLIED OPTIMIZATIONS:\n"
            log += '\n'.join(f"• {opt}" for opt in optimizations)
        else:
            log += "No major optimizations could be applied automatically.\n"
            log += "Try writing more optimization-friendly code (simple loops, small functions with clear return values).\n"
            log += "Complex code structures (e.g., nested loops with complex conditions, dynamic memory, virtual functions) are harder for this simple regex-based optimizer to handle."
        
        log += "\n\nIMPORTANT CONSIDERATIONS & LIMITATIONS:\n"
        log += "• This tool performs **source-to-source transformations using regular expressions**. It does NOT parse C++ code semantically.\n"
        log += "• **Risk of Incorrect Transformations:** Regex cannot fully understand C++ grammar, context, or semantics (e.g., type information, scope, macros, operator overloading).\n"
        log += "  This means transformations might introduce subtle bugs, alter program logic, or produce invalid C++ code.\n"
        log += "• **Always Compile and Test:** You MUST compile the optimized code with a C++ compiler (e.g., g++, clang++) and thoroughly test its correctness and performance.\n"
        log += "• **Combine with Compiler Optimizations:** For best results, always combine this tool's output with compiler flags like `-O2`, `-O3`, or Link Time Optimization (LTO).\n"
        log += "• **Profiling is Key:** Actual performance improvements should always be verified using profiling tools (e.g., Valgrind, gprof, perf) on your target hardware.\n"
        log += "• **Manual Review:** Carefully review all changes, especially for complex code sections.\n"
        
        return log

if __name__ == "__main__":
    root = tk.Tk()
    app = CppOptimizerGUI(root)
    root.mainloop()
