In [1]:
import pandas as pd          
import math
import matplotlib.pyplot as plt        
from sklearn.model_selection import train_test_split  
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, VotingRegressor 
from sklearn.metrics import r2_score     

def process_csv(file_paths):
    # Initialize lists to store IPC values from each file
    all_ipc_values = []

    for file_path in file_paths:
        # Load the CSV file
        df = pd.read_csv(file_path)
        
        # Replace all non-numeric values with 0
        df = df.apply(pd.to_numeric, errors='coerce').fillna(0)
        
        # Calculate the number of instructions for 0.01% of instructions
        instruction_sum = df['instruction'].sum()
        inst_per_chunk = int(math.ceil(instruction_sum * 0.0001))
        
        chunk_inst_value = 0
        ipc_values = []
        row_start = 0
        
        for start_row in range(0, len(df)):
            chunk_inst_value += df.iloc[start_row]['instruction']
            
            if chunk_inst_value >= inst_per_chunk:
                chunk_data = df.iloc[row_start:start_row].sum()
                ipc_values.append(chunk_data)
                chunk_inst_value = 0
                row_start = start_row

        
        # Handle last chunk
        chunk_data = df.iloc[row_start:len(df)].sum()
        ipc_values.append(chunk_data)

        # Create CSV
        ipc_df = pd.DataFrame(ipc_values)
        ipc_df.to_csv('inst_aligned_' + file_path + '.csv', index=False)
    
    # Return IPC values for all files
    return all_ipc_values

# List of CSV files for In Order and Out of Order
in_order_files = ['InO_500.csv', 'InO_502.csv', 'InO_505.csv', 'InO_520.csv', 'InO_523.csv', 'InO_525.csv', 'InO_531.csv', 'InO_541.csv', 'InO_548.csv', 'InO_557.csv','InO_503.csv', 'InO_507.csv', 'InO_508.csv', 'InO_510.csv', 'InO_511.csv', 'InO_519.csv', 'InO_521.csv', 'InO_526.csv', 'InO_527.csv', 'InO_538.csv', 'InO_544.csv', 'InO_549.csv', 'InO_554.csv']
out_of_order_files = ['OOO_500.csv', 'OOO_502.csv', 'OOO_505.csv', 'OOO_520.csv', 'OOO_523.csv', 'OOO_525.csv', 'OOO_531.csv', 'OOO_541.csv', 'OOO_548.csv', 'OOO_557.csv','OOO_503.csv', 'OOO_507.csv', 'OOO_508.csv', 'OOO_510.csv', 'OOO_511.csv', 'OOO_519.csv', 'OOO_521.csv', 'OOO_526.csv', 'OOO_527.csv', 'OOO_538.csv', 'OOO_544.csv', 'OOO_549.csv', 'OOO_554.csv']

# Process the CSV files
result1 = process_csv(in_order_files)  #In Order CSV files
result2 = process_csv(out_of_order_files)  #Out of Order CSV files



  df = pd.read_csv(file_path)


1936058136239.0
193605814
1936058136239.0


  df = pd.read_csv(file_path)


773747686501.0
77374769
773747686501.0


  df = pd.read_csv(file_path)


414930894533.0
41493090
414930894533.0


  df = pd.read_csv(file_path)


758203343185.0
75820335
758203343185.0


  df = pd.read_csv(file_path)


733926031153.0
73392604
733926031153.0


  df = pd.read_csv(file_path)


1607942508617.0
160794251
1607942508617.0


  df = pd.read_csv(file_path)


1240116642209.0
124011665
1240116642209.0


  df = pd.read_csv(file_path)


1503160549236.0
150316055
1503160549236.0


  df = pd.read_csv(file_path)


2500846446596.0
250084645
2500846446596.0


  df = pd.read_csv(file_path)


1270841373203.0
127084138
1270841373203.0


  df = pd.read_csv(file_path)


766567321214.0
76656733
766567321214.0


  df = pd.read_csv(file_path)


960073559711.0
96007356
960073559711.0


  df = pd.read_csv(file_path)


1133792678838.0
113379268
1133792678838.0


  df = pd.read_csv(file_path)


1610700069137.0
161070007
1610700069137.0


  df = pd.read_csv(file_path)


2342822433073.0
234282244
2342822433073.0


  df = pd.read_csv(file_path)


486878334503.0
48687834
486878334503.0


  df = pd.read_csv(file_path)


1003502071245.0
100350208
1003502071245.0


  df = pd.read_csv(file_path)


886700243593.0
88670025
886700243593.0


  df = pd.read_csv(file_path)


1154250394937.0
115425040
1154250394937.0


  df = pd.read_csv(file_path)


2672821466996.0
267282147
2672821466996.0


  df = pd.read_csv(file_path)


1194623428669.0
119462343
1194623428669.0


  df = pd.read_csv(file_path)


630065970005.0
63006598
630065970005.0


  df = pd.read_csv(file_path)


573666026680.0
57366603
573666026680.0


  df = pd.read_csv(file_path)


2031166010414.0
203116602
2031166010414.0
788020230751.0
78802024
788020230751.0


  df = pd.read_csv(file_path)


588627566618.0
58862757
588627566618.0


  df = pd.read_csv(file_path)


664062936199.0
66406294
664062936199.0


  df = pd.read_csv(file_path)


789233840573.0
78923385
789233840573.0
1778982954606.0
177898296
1778982954606.0


  df = pd.read_csv(file_path)


1398797024873.0
139879703
1398797024873.0
1694131353250.0
169413136
1694131353250.0


  df = pd.read_csv(file_path)


2818161632914.0
281816164
2818161632914.0
1285081234933.0
128508124
1285081234933.0
1101113765838.0
110111377
1101113765838.0
876522334880.0
87652234
876522334880.0
1294405613760.0
129440562
1294405613760.0
2092135104611.0
209213511
2092135104611.0
2634627308688.0
263462731
2634627308688.0
746589762732.0
74658977
746589762732.0
1290706678362.0
129070668
1290706678362.0


  df = pd.read_csv(file_path)


941214576609.0
94121458
941214576609.0


  df = pd.read_csv(file_path)


1065507044695.0
106550705
1065507044695.0


  df = pd.read_csv(file_path)


3044613869471.0
304461387
3044613869471.0
1341739810464.0
134173982
1341739810464.0
908746960707.0
90874697
908746960707.0
836225938014.0
83622594
836225938014.0
