# Transform the data

## Objective: 
Create a pipeline to transform the log data into a dataframe we can use for predictive modelling.

## Table format: Features

### Input Features

- $ n $: Number of elements (e.g., 16, 31).
- $ k $: Number of partitions (e.g., 5, 4).
- Total sum: $ \sum S $ (requires input numbers).
- Variance: $ \text{var}(S) $.
- Skewness: Distribution shape
- Max/min number.
- Average subset sum: $ \text{total sum} / k $.

### Solver Features (First $ k $ Logs at stackDepth=3):

**For each log (up to $ k $):**

- evts: Events at stackDepth=3.
- expandEvts: Expansions.
- pruneBacktrackEvts: Pruning backtracks.
- backtrackEvts: Non-pruning backtracks.
- strengthenEvts: Constraint tightenings.
- maxStackDepth: Maximum depth reached.
- Subset sums: Sum of numbers assigned to each subset based on path (requires input numbers).
- Subset sum variance: Variance of subset sums.
- Aggregated: Average or max evts, expandEvts, pruneBacktrackEvts across the $ k $ logs.
- num_stackdepth3_logs: Number of stackDepth=3 logs (proxy for search difficulty).

### Termination/Timeout Features
- expandEvts (target variable).
- Censored flag: 1 for timeouts, 0 for completions.
- Objective value: maxsum - minsum (if available, e.g., 2 for $ n=10, k=3 $).

In [1]:
import json
import re
import os
import pandas as pd
import numpy as np
from scipy import stats

# 1. Create a dataframe with solver features
The solver features are as listed above. The index will be the file names

In [2]:

def extract_ml_features(jsonl_path):
    """
    Extract ML features from the ml_features.jsonl file.
    
    Args:
        jsonl_path: Path to the ml_features.jsonl file
        
    Returns:
        pandas.DataFrame: DataFrame containing the extracted features
    """
    # List to store data for each instance
    data_list = []
    
    # Open and process the JSONL file
    with open(jsonl_path, 'r') as f:
        for line in f:
            try:
                # Parse the JSON line
                line_data = json.loads(line.strip())
                
                # Each line contains a single key (filename) with an array of log entries
                for filename, logs in line_data.items():
                    if not logs:  # Skip if no logs
                        continue
                    
                    # Extract n and k from filename using regex
                    match = re.search(r'n(\d+)k(\d+)', filename)
                    if match:
                        k = int(match.group(2))  # Number of partitions
                    else:
                        # If pattern doesn't match, try to infer from the logs
                        k = max(3, min(5, len(logs)))
                    
                    # Initialize feature dictionary
                    features = {
                        'filename': filename,
                        'num_stackdepth3_logs': 0
                    }
                    
                    # Extract individual log features (up to k logs)
                    for i in range(min(k, len(logs))):
                        log = logs[i]
                        
                        if log.get('stackDepth', 0) == 3:
                            features['num_stackdepth3_logs'] += 1
                            
                        # Extract all numeric features from this log
                        for field in ['evts', 'expandEvts', 'pruneBacktrackEvts', 
                                     'backtrackEvts', 'strengthenEvts', 'maxStackDepth']:
                            if field in log:
                                features[f'{field}_{i+1}'] = log[field]
                    
                    # Find the termination or timeout event (should be the last log)
                    last_log = logs[-1]
                    last_event = last_log.get('event', '')
                    
                    # Check for either TIMEOUT or TERMINATE events
                    is_timeout = last_event == 'TIMEOUT'
                    is_terminated = last_event == 'TERMINATE'
                    
                    # Add target variables
                    features['censored'] = 1 if is_timeout else 0
                    features['final_expandEvts'] = last_log.get('expandEvts', 0)
                    features['final_maxStackDepth'] = last_log.get('maxStackDepth', 0)
                    
                    # Add specific event information if available
                    if is_timeout and 'timeoutAt' in last_log:
                        features['stop_iter'] = last_log['timeoutAt']
                    elif is_terminated and 'terminateAt' in last_log:
                        features['stop_iter'] = last_log['terminateAt']
                    else:
                        features['stop_iter'] = last_log.get('iter', 0)
                    
                    # Calculate aggregated features
                    for field in ['evts', 'expandEvts', 'pruneBacktrackEvts']:
                        values = [log.get(field, 0) for log in logs[:k] if field in log]
                        if values:
                            features[f'avg_{field}'] = sum(values) / len(values)
                            features[f'max_{field}'] = max(values)
                    
                    # Add to data list
                    data_list.append(features)
                    
            except json.JSONDecodeError as e:
                print(f"Error parsing JSON line: {e}")
                continue
            except Exception as e:
                print(f"Error processing line: {e}")
                continue
    
    # Convert to DataFrame
    df = pd.DataFrame(data_list)
    df = df.set_index("filename")
    
    # For demonstration, print the shape and first few rows
    print(f"Extracted features for {len(df)} instances")
    print(f"DataFrame shape: {df.shape}")
    
    return df


solver_features_df = extract_ml_features("ml_features.jsonl")
# df.to_csv("ml_features.csv", index=False)


Extracted features for 690 instances
DataFrame shape: (690, 41)


In [3]:
solver_features_df

Unnamed: 0_level_0,num_stackdepth3_logs,evts_1,expandEvts_1,pruneBacktrackEvts_1,backtrackEvts_1,strengthenEvts_1,maxStackDepth_1,evts_2,expandEvts_2,pruneBacktrackEvts_2,...,pruneBacktrackEvts_4,backtrackEvts_4,strengthenEvts_4,maxStackDepth_4,evts_5,expandEvts_5,pruneBacktrackEvts_5,backtrackEvts_5,strengthenEvts_5,maxStackDepth_5
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
n10k3_v1.txt,3,4,4,0,0,0,3,60,30,25,...,,,,,,,,,,
n10k3_v2.txt,3,4,4,0,0,0,3,59,31,25,...,,,,,,,,,,
n10k3_v3.txt,3,4,4,0,0,0,3,66,34,27,...,,,,,,,,,,
n10k3_v4.txt,3,4,4,0,0,0,3,106,54,47,...,,,,,,,,,,
n10k3_v5.txt,3,4,4,0,0,0,3,21,11,8,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
n9k5_v1.txt,2,4,4,0,0,0,3,19,10,7,...,,,,,,,,,,
n9k5_v2.txt,5,4,4,0,0,0,3,27,15,10,...,29.0,1.0,1.0,10.0,65.0,34.0,29.0,1.0,1.0,10.0
n9k5_v3.txt,2,4,4,0,0,0,3,19,10,7,...,,,,,,,,,,
n9k5_v4.txt,2,4,4,0,0,0,3,19,10,7,...,,,,,,,,,,


## 2. Create a Dataframe with input Features

In [4]:

def process_path_features(df, instance_dir):
    """
    Process input features from the original instance files.
    
    Args:
        df: DataFrame with existing features from logs
        instance_dir: Directory containing the original instance files
        
    Returns:
        pandas.DataFrame: DataFrame with input features
    """
    # Create a new DataFrame for input features with filename as index
    input_features = pd.DataFrame(index=df.index)
    
    # Track missing files
    missing_files = []
    processed_files = 0
    
    # Process each file
    for filename in df.index:
        file_path = os.path.join(instance_dir, filename)
        
        try:
            if not os.path.exists(file_path):
                missing_files.append(filename)
                continue
                
            # Read the instance file
            with open(file_path, 'r') as f:
                lines = f.readlines()
            
            # Extract data from file
            solution = int(lines[0].strip())  # -1 if no solution
            k = int(lines[1].strip())         # number of partitions
            numbers = [int(line.strip()) for line in lines[2:]]
            n = len(numbers)                  # number of elements
            
            # Calculate basic input features
            total_sum = sum(numbers)
            variance = np.var(numbers) if n > 1 else 0
            skewness = stats.skew(numbers) if n > 2 else 0
            max_num = max(numbers) if numbers else 0
            min_num = min(numbers) if numbers else 0
            avg_subset_sum = total_sum / k if k > 0 else 0
            
            # Store features
            input_features.loc[filename, 'n'] = n
            input_features.loc[filename, 'k'] = k
            input_features.loc[filename, 'total_sum'] = total_sum
            input_features.loc[filename, 'variance'] = variance
            input_features.loc[filename, 'skewness'] = skewness
            input_features.loc[filename, 'max_num'] = max_num
            input_features.loc[filename, 'min_num'] = min_num
            input_features.loc[filename, 'avg_subset_sum'] = avg_subset_sum
            
            # Calculate additional features
            
            # How close is the maximum number to the average subset sum?
            # If max_num > avg_subset_sum, the problem is likely harder
            input_features.loc[filename, 'max_to_avg_ratio'] = max_num / avg_subset_sum if avg_subset_sum > 0 else float('inf')
            
            # Range to average ratio
            input_features.loc[filename, 'range_to_avg_ratio'] = (max_num - min_num) / avg_subset_sum if avg_subset_sum > 0 else float('inf')
            
            # Coefficient of variation (standardized measure of dispersion)
            mean = np.mean(numbers)
            std_dev = np.std(numbers)
            input_features.loc[filename, 'coef_of_variation'] = std_dev / mean if mean > 0 else 0
            
            processed_files += 1
            
        except Exception as e:
            print(f"Error processing file {filename}: {e}")
            missing_files.append(filename)
    
    print(f"Processed {processed_files} instance files")
    print(f"Missing {len(missing_files)} files")
    
    if missing_files:
        print(f"First few missing files: {missing_files[:5]}")
    
    # Return a new DataFrame with input features
    # This keeps the original df unchanged and allows for better merging later
    return input_features



# Example usage:
instance_dir = "solver/numpart/instances/feature_collected"
input_features_df = process_path_features(solver_features_df, instance_dir)
# df = extract_and_analyze_ml_features("ml_features.jsonl", instance_dir)
# df.to_csv("ml_features_complete.csv", index=False)

input_features_df

Processed 690 instance files
Missing 0 files


Unnamed: 0_level_0,n,k,total_sum,variance,skewness,max_num,min_num,avg_subset_sum,max_to_avg_ratio,range_to_avg_ratio,coef_of_variation
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
n10k3_v1.txt,10.0,3.0,507.0,1060.810000,-0.047343,99.0,1.0,169.000000,0.585799,0.579882,0.642408
n10k3_v2.txt,10.0,3.0,496.0,683.040000,-0.125352,91.0,2.0,165.333333,0.550403,0.538306,0.526916
n10k3_v3.txt,10.0,3.0,512.0,777.560000,-0.007893,91.0,8.0,170.666667,0.533203,0.486328,0.544624
n10k3_v4.txt,10.0,3.0,505.0,658.250000,0.199096,94.0,9.0,168.333333,0.558416,0.504950,0.508047
n10k3_v5.txt,10.0,3.0,499.0,1497.490000,-0.182731,92.0,3.0,166.333333,0.553106,0.535070,0.775499
...,...,...,...,...,...,...,...,...,...,...,...
n9k5_v1.txt,9.0,5.0,449.0,1161.654321,0.116110,96.0,8.0,89.800000,1.069042,0.979955,0.683179
n9k5_v2.txt,9.0,5.0,527.0,580.024691,-0.392412,86.0,12.0,105.400000,0.815939,0.702087,0.411297
n9k5_v3.txt,9.0,5.0,454.0,1034.469136,0.112580,98.0,3.0,90.800000,1.079295,1.046256,0.637596
n9k5_v4.txt,9.0,5.0,383.0,938.691358,0.280482,91.0,3.0,76.600000,1.187990,1.148825,0.719955


In [5]:
def merge_features(solver_features_df, input_features_df):
    """
    Merge solver features with input features.
    
    Args:
        solver_features_df: DataFrame with solver features
        input_features_df: DataFrame with input features
        
    Returns:
        pandas.DataFrame: Merged DataFrame
    """
    # Set filename as index for solver features to enable proper joining
    # solver_features_df = solver_features_df.set_index('filename')
    
    # Merge DataFrames on filename index
    merged_df = solver_features_df.join(input_features_df, how='inner', lsuffix='_solver', rsuffix='_input')
    
    # Reset index to make filename a column again
    # merged_df = merged_df.reset_index()
    
    print(f"Merged features: {len(merged_df)} rows, {len(merged_df.columns)} columns")
    
    return merged_df

df = merge_features(solver_features_df, input_features_df)

Merged features: 690 rows, 52 columns


## Extend Dataframe
Create another table with the following features:
- subset_sum_max,
- subset_sum_min,
- subset_sum_variance
This is applicable to those that didn't timeout.

In [6]:
df

Unnamed: 0_level_0,num_stackdepth3_logs,evts_1,expandEvts_1,pruneBacktrackEvts_1,backtrackEvts_1,strengthenEvts_1,maxStackDepth_1,evts_2,expandEvts_2,pruneBacktrackEvts_2,...,k,total_sum,variance,skewness,max_num,min_num,avg_subset_sum,max_to_avg_ratio,range_to_avg_ratio,coef_of_variation
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
n10k3_v1.txt,3,4,4,0,0,0,3,60,30,25,...,3.0,507.0,1060.810000,-0.047343,99.0,1.0,169.000000,0.585799,0.579882,0.642408
n10k3_v2.txt,3,4,4,0,0,0,3,59,31,25,...,3.0,496.0,683.040000,-0.125352,91.0,2.0,165.333333,0.550403,0.538306,0.526916
n10k3_v3.txt,3,4,4,0,0,0,3,66,34,27,...,3.0,512.0,777.560000,-0.007893,91.0,8.0,170.666667,0.533203,0.486328,0.544624
n10k3_v4.txt,3,4,4,0,0,0,3,106,54,47,...,3.0,505.0,658.250000,0.199096,94.0,9.0,168.333333,0.558416,0.504950,0.508047
n10k3_v5.txt,3,4,4,0,0,0,3,21,11,8,...,3.0,499.0,1497.490000,-0.182731,92.0,3.0,166.333333,0.553106,0.535070,0.775499
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
n9k5_v1.txt,2,4,4,0,0,0,3,19,10,7,...,5.0,449.0,1161.654321,0.116110,96.0,8.0,89.800000,1.069042,0.979955,0.683179
n9k5_v2.txt,5,4,4,0,0,0,3,27,15,10,...,5.0,527.0,580.024691,-0.392412,86.0,12.0,105.400000,0.815939,0.702087,0.411297
n9k5_v3.txt,2,4,4,0,0,0,3,19,10,7,...,5.0,454.0,1034.469136,0.112580,98.0,3.0,90.800000,1.079295,1.046256,0.637596
n9k5_v4.txt,2,4,4,0,0,0,3,19,10,7,...,5.0,383.0,938.691358,0.280482,91.0,3.0,76.600000,1.187990,1.148825,0.719955


In [7]:
def add_ratio_features(df):
    """
    Adds ratio features (expandEvts_i/evts_i, pruneBacktrackEvts_i/evts_i) to the DataFrame
    and imputes missing log features with zeros based on num_stackdepth3_logs and k.

    Parameters:
    - df (pandas.DataFrame): Input DataFrame with solver and input features.

    Returns:
    - pandas.DataFrame: Updated DataFrame with ratio features and imputed missing logs.
    """
    # Create a copy to avoid modifying the input DataFrame
    df = df.copy()
    
    # Impute missing logs with 0 based on num_stackdepth3_logs and k=3
    for i in range(1, 6):
        for feature in ['evts', 'expandEvts', 'pruneBacktrackEvts', 'backtrackEvts', 'strengthenEvts', 'maxStackDepth']:
            col = f'{feature}_{i}'
            if col in df:
                # Set to 0 for k=3 (logs 4, 5 not expected)
                df.loc[df['k'] == 3, col] = df.loc[df['k'] == 3, col].fillna(0)
                # Set to 0 for missing logs based on num_stackdepth3_logs
                df.loc[df['num_stackdepth3_logs'] < i, col] = df.loc[df['num_stackdepth3_logs'] < i, col].fillna(0)
    
    # Add ratio features
    for i in range(1, 6):
        expand_ratio_col = f'expandEvts_ratio_{i}'
        prune_ratio_col = f'pruneBacktrackEvts_ratio_{i}'
        df[expand_ratio_col] = df[f'expandEvts_{i}'] / df[f'evts_{i}'].replace(0, np.nan)
        df[prune_ratio_col] = df[f'pruneBacktrackEvts_{i}'] / df[f'evts_{i}'].replace(0, np.nan)
        # Impute NaN ratios with 0
        df[expand_ratio_col] = df[expand_ratio_col].fillna(0)
        df[prune_ratio_col] = df[prune_ratio_col].fillna(0)
    
    return df

df = add_ratio_features(df)

In [8]:
df


Unnamed: 0_level_0,num_stackdepth3_logs,evts_1,expandEvts_1,pruneBacktrackEvts_1,backtrackEvts_1,strengthenEvts_1,maxStackDepth_1,evts_2,expandEvts_2,pruneBacktrackEvts_2,...,expandEvts_ratio_1,pruneBacktrackEvts_ratio_1,expandEvts_ratio_2,pruneBacktrackEvts_ratio_2,expandEvts_ratio_3,pruneBacktrackEvts_ratio_3,expandEvts_ratio_4,pruneBacktrackEvts_ratio_4,expandEvts_ratio_5,pruneBacktrackEvts_ratio_5
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
n10k3_v1.txt,3,4,4,0,0,0,3,60,30,25,...,1.0,0.0,0.500000,0.416667,0.516129,0.403226,0.000000,0.000000,0.000000,0.000000
n10k3_v2.txt,3,4,4,0,0,0,3,59,31,25,...,1.0,0.0,0.525424,0.423729,0.516854,0.449438,0.000000,0.000000,0.000000,0.000000
n10k3_v3.txt,3,4,4,0,0,0,3,66,34,27,...,1.0,0.0,0.515152,0.409091,0.509434,0.443396,0.000000,0.000000,0.000000,0.000000
n10k3_v4.txt,3,4,4,0,0,0,3,106,54,47,...,1.0,0.0,0.509434,0.443396,0.500000,0.464789,0.000000,0.000000,0.000000,0.000000
n10k3_v5.txt,3,4,4,0,0,0,3,21,11,8,...,1.0,0.0,0.523810,0.380952,0.565217,0.347826,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
n9k5_v1.txt,2,4,4,0,0,0,3,19,10,7,...,1.0,0.0,0.526316,0.368421,0.454545,0.409091,0.000000,0.000000,0.000000,0.000000
n9k5_v2.txt,5,4,4,0,0,0,3,27,15,10,...,1.0,0.0,0.555556,0.370370,0.533333,0.422222,0.507937,0.460317,0.523077,0.446154
n9k5_v3.txt,2,4,4,0,0,0,3,19,10,7,...,1.0,0.0,0.526316,0.368421,0.454545,0.409091,0.000000,0.000000,0.000000,0.000000
n9k5_v4.txt,2,4,4,0,0,0,3,19,10,7,...,1.0,0.0,0.526316,0.368421,0.454545,0.409091,0.000000,0.000000,0.000000,0.000000


In [10]:
def preprocess_and_save_dataframe(df, output_excel_path, include_ratios=False):
    """
    Preprocesses the input DataFrame by adding log difference features and imputing missing log features,
    then saves the modified DataFrame to an Excel file.

    Parameters:
    - df (pandas.DataFrame): Input DataFrame with solver features and final_expandEvts.
    - output_excel_path (str): Path to save the modified DataFrame as an Excel file.
    - include_ratios (bool): If True, includes ratio features; if False, excludes them in feature list (default: False).

    Returns:
    - pandas.DataFrame: Preprocessed DataFrame with added and imputed features.
    """
    try:
        # Validate input
        if not isinstance(df, pd.DataFrame):
            raise TypeError(f"Input 'df' must be a pandas DataFrame, got {type(df)}")
        print(f"Input DataFrame type: {type(df)}")
        print(f"Input columns: {df.columns.tolist()}")
        
        # Create a copy to avoid modifying the input DataFrame
        df = df.copy()
        
        # Add log difference features
        print("Adding log difference features...")
        for i in range(2, 6):
            for feature in ['evts', 'expandEvts', 'pruneBacktrackEvts']:
                col_curr = f'{feature}_{i}'
                col_prev = f'{feature}_{i-1}'
                if col_curr in df and col_prev in df:
                    df[f'diff_{feature}_{i}'] = df[col_curr] - df[col_prev]
                    df[f'diff_{feature}_{i}'] = df[f'diff_{feature}_{i}'].fillna(0)
        print(f"Columns after log difference features: {df.columns.tolist()}")
        
        # Ensure missing log features are imputed
        print("Imputing missing log features...")
        for i in range(1, 6):
            for feature in ['evts', 'expandEvts', 'pruneBacktrackEvts', 'backtrackEvts', 'strengthenEvts', 'maxStackDepth']:
                col = f'{feature}_{i}'
                if col in df:
                    df.loc[df['k'] == 3, col] = df.loc[df['k'] == 3, col].fillna(0)
                    df.loc[df['num_stackdepth3_logs'] < i, col] = df.loc[df['num_stackdepth3_logs'] < i, col].fillna(0)
        print(f"Columns after imputation: {df.columns.tolist()}")
        
        # Save the preprocessed DataFrame to Excel
        print(f"Saving preprocessed DataFrame to {output_excel_path}...")
        # os.makedirs(os.path.dirname(output_excel_path), exist_ok=True)
        df.to_excel(output_excel_path, index=False)
        print(f"Preprocessed DataFrame saved successfully to {output_excel_path}")
        
        return df
    
    except Exception as e:
        print(f"Error in preprocess_and_save_dataframe: {e}")
        return None

preprocess_and_save_dataframe(df, "structured_data.xlsx")

Input DataFrame type: <class 'pandas.core.frame.DataFrame'>
Input columns: ['num_stackdepth3_logs', 'evts_1', 'expandEvts_1', 'pruneBacktrackEvts_1', 'backtrackEvts_1', 'strengthenEvts_1', 'maxStackDepth_1', 'evts_2', 'expandEvts_2', 'pruneBacktrackEvts_2', 'backtrackEvts_2', 'strengthenEvts_2', 'maxStackDepth_2', 'evts_3', 'expandEvts_3', 'pruneBacktrackEvts_3', 'backtrackEvts_3', 'strengthenEvts_3', 'maxStackDepth_3', 'censored', 'final_expandEvts', 'final_maxStackDepth', 'stop_iter', 'avg_evts', 'max_evts', 'avg_expandEvts', 'max_expandEvts', 'avg_pruneBacktrackEvts', 'max_pruneBacktrackEvts', 'evts_4', 'expandEvts_4', 'pruneBacktrackEvts_4', 'backtrackEvts_4', 'strengthenEvts_4', 'maxStackDepth_4', 'evts_5', 'expandEvts_5', 'pruneBacktrackEvts_5', 'backtrackEvts_5', 'strengthenEvts_5', 'maxStackDepth_5', 'n', 'k', 'total_sum', 'variance', 'skewness', 'max_num', 'min_num', 'avg_subset_sum', 'max_to_avg_ratio', 'range_to_avg_ratio', 'coef_of_variation', 'expandEvts_ratio_1', 'pruneBa

Unnamed: 0_level_0,num_stackdepth3_logs,evts_1,expandEvts_1,pruneBacktrackEvts_1,backtrackEvts_1,strengthenEvts_1,maxStackDepth_1,evts_2,expandEvts_2,pruneBacktrackEvts_2,...,diff_pruneBacktrackEvts_2,diff_evts_3,diff_expandEvts_3,diff_pruneBacktrackEvts_3,diff_evts_4,diff_expandEvts_4,diff_pruneBacktrackEvts_4,diff_evts_5,diff_expandEvts_5,diff_pruneBacktrackEvts_5
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
n10k3_v1.txt,3,4,4,0,0,0,3,60,30,25,...,25,2.0,2.0,0.0,-62.0,-32.0,-25.0,0.0,0.0,0.0
n10k3_v2.txt,3,4,4,0,0,0,3,59,31,25,...,25,30.0,15.0,15.0,-89.0,-46.0,-40.0,0.0,0.0,0.0
n10k3_v3.txt,3,4,4,0,0,0,3,66,34,27,...,27,40.0,20.0,20.0,-106.0,-54.0,-47.0,0.0,0.0,0.0
n10k3_v4.txt,3,4,4,0,0,0,3,106,54,47,...,47,36.0,17.0,19.0,-142.0,-71.0,-66.0,0.0,0.0,0.0
n10k3_v5.txt,3,4,4,0,0,0,3,21,11,8,...,8,2.0,2.0,0.0,-23.0,-13.0,-8.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
n9k5_v1.txt,2,4,4,0,0,0,3,19,10,7,...,7,3.0,0.0,2.0,-22.0,-10.0,-9.0,0.0,0.0,0.0
n9k5_v2.txt,5,4,4,0,0,0,3,27,15,10,...,10,18.0,9.0,9.0,18.0,8.0,10.0,2.0,2.0,0.0
n9k5_v3.txt,2,4,4,0,0,0,3,19,10,7,...,7,3.0,0.0,2.0,-22.0,-10.0,-9.0,0.0,0.0,0.0
n9k5_v4.txt,2,4,4,0,0,0,3,19,10,7,...,7,3.0,0.0,2.0,-22.0,-10.0,-9.0,0.0,0.0,0.0
