In [2]:
import pandas as pd

def aggregate_runtime_results(csv_file):
    """
    Reads the runtime expression results CSV file and returns an aggregated DataFrame.
    
    The CSV is expected to have columns such as:
        'J', 'K', 'N', 'seed', 'avg_runtime', 'prop_identifiable',
        'prop_non_identifiable', 'total_time', 'count_direct_flag', 'prop_direct_flag',
        'count_brute_flag', 'prop_brute_flag', and branch-specific counts/proportions.
    
    This function groups the results by 'J', 'K', and 'N' (i.e. averaging across different seeds)
    and computes the mean for each numeric field.
    
    Parameters:
        csv_file (str): Path to the CSV file.
    
    Returns:
        pd.DataFrame: Aggregated results.
    """
    # Read the CSV into a pandas DataFrame.
    df = pd.read_csv(csv_file)
    
    # Identify grouping keys. Here we assume that J, K, N uniquely identify a test scenario.
    group_keys = ['J', 'K', 'N']
    
    # Group by the keys and take the mean of all other numeric columns.
    aggregated = df.groupby(group_keys).mean().reset_index()
    
    return aggregated

# Example usage:
if __name__ == "__main__":
    csv_file = "runtime_expr_results_J30_K4.csv"  # adjust the path as needed
    agg_results = aggregate_runtime_results(csv_file)
    print("Aggregated runtime results:")
    print(agg_results)


ParserError: Error tokenizing data. C error: Expected 14 fields in line 181, saw 16


In [5]:
import csv

# Open the CSV file for reading
with open('runtime_expr_results_J30_K4.csv', newline='') as csvfile:
    csv_reader = csv.reader(csvfile)
    
    # Process each row by slicing off the last column
    for row in csv_reader:
        processed_row = row[:-1]



In [11]:
df

Unnamed: 0,J,K,N,seed,avg_runtime,prop_identifiable,prop_non_identifiable,total_time,count_direct_flag,prop_direct_flag,count_brute_flag,prop_brute_flag,count_generator_empty
0,30,4,10,1,0.008772563934326173,1.0,0.0,0.08772563934326172,5,0.5,5,0.5,10
1,30,4,10,7,0.0120802640914917,1.0,0.0,0.12080264091491699,6,0.6,4,0.4,10
2,30,4,10,0,0.00687713623046875,1.0,0.0,0.0687713623046875,5,0.5,5,0.5,10
3,30,4,10,5,0.006256222724914551,1.0,0.0,0.06256222724914551,5,0.5,5,0.5,10
4,30,4,10,2,0.01937856674194336,1.0,0.0,0.1937856674194336,6,0.6,4,0.4,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...
987,30,4,10,990,0.00761406421661377,1.0,0.0,0.0761406421661377,3,0.3,7,0.7,10
988,30,4,10,992,0.009774112701416015,1.0,0.0,0.09774112701416016,4,0.4,6,0.6,10
989,30,4,10,993,0.011006712913513184,1.0,0.0,0.11006712913513184,3,0.3,7,0.7,10
990,30,4,10,994,0.021950602531433105,1.0,0.0,0.21950602531433105,3,0.3,7,0.7,10


In [17]:
import csv
import pandas as pd


# Read CSV using only the columns you want (skipping the last one)
header = pd.read_csv('runtime_expr_results_J30_K4.csv', nrows=0).columns.tolist()
cols_to_use = header[:13]
df = pd.read_csv('runtime_expr_results_J30_K4.csv', usecols=cols_to_use)

# Convert all columns to numeric where possible; non-convertible values become NaN
df_numeric = df.apply(pd.to_numeric, errors='coerce')

# Now compute the mean
means = df_numeric.mean()
print(means)


J                         30.000000
K                          4.000000
N                         10.000000
seed                     495.790698
avg_runtime                0.077983
prop_identifiable          0.998787
prop_non_identifiable      0.001213
total_time                 0.779830
count_direct_flag          5.319515
prop_direct_flag           0.531951
count_brute_flag           4.668352
prop_brute_flag            0.466835
count_generator_empty      9.979778
dtype: float64


In [18]:
import csv
import pandas as pd


# Read CSV using only the columns you want (skipping the last one)
header = pd.read_csv('runtime_expr_results_J20_K4.csv', nrows=0).columns.tolist()
cols_to_use = header[:13]
df = pd.read_csv('runtime_expr_results_J20_K4.csv', usecols=cols_to_use)

# Convert all columns to numeric where possible; non-convertible values become NaN
df_numeric = df.apply(pd.to_numeric, errors='coerce')

# Now compute the mean
means = df_numeric.mean()
print(means)


J                         20.000000
K                          4.000000
N                         10.000000
seed                     494.526946
avg_runtime                0.318599
prop_identifiable          0.963673
prop_non_identifiable      0.036327
total_time                 3.185993
count_direct_flag          2.602794
prop_direct_flag           0.260279
count_brute_flag           7.033932
prop_brute_flag            0.703393
count_generator_empty      9.359281
dtype: float64


In [16]:
# Calculate the mean only for numeric columns
means = df.mean(numeric_only=True)
print(means)

Series([], dtype: float64)


In [8]:
import pandas as pd
try:
    df = pd.read_csv("runtime_expr_results_J30_K4.csv")
    print(df.head())
except pd.errors.ParserError as e:
    print("ParserError:", e)


ParserError: Error tokenizing data. C error: Expected 14 fields in line 181, saw 16

