### Imports

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

### Instance Data Merger

In [None]:
root_dir = "/home/nvombat/Desktop/z3r0_7ru57/research/experiments/hpc/experiment3"
output_dir = root_dir

In [None]:
e1_combined = pd.DataFrame()
e2_combined = pd.DataFrame()

# Loop through each folder in the root directory
for folder in sorted(os.listdir(root_dir)):
    folder_path = os.path.join(root_dir, folder)

    # Loop through each subfolder in the root directory
    for sub_folder in sorted(os.listdir(folder_path)):
        sub_folder_path = os.path.join(folder_path, sub_folder)

        if os.path.isdir(sub_folder_path):
            # Find the CSV files in the folder
            for file in os.listdir(sub_folder_path):
                if file.endswith(".csv"):
                    file_path = os.path.join(sub_folder_path, file)

                    if "experiment_data_e1" in file:
                        df_e1 = pd.read_csv(file_path)
                        e1_combined = pd.concat([e1_combined, df_e1], ignore_index=True)
                    elif "experiment_data_e2" in file:
                        df_e2 = pd.read_csv(file_path)
                        e2_combined = pd.concat([e2_combined, df_e2], ignore_index=True)

# Save the concatenated DataFrames
os.makedirs(output_dir, exist_ok=True)
e1_combined.to_csv(os.path.join(output_dir, 'concatenated_e1.csv'), index=False)
e2_combined.to_csv(os.path.join(output_dir, 'concatenated_e2.csv'), index=False)

print("Concatenation Complete. Files Saved To: ", output_dir)

### Instance Data Analyzer & Builder

##### Mean, Median and Standard Deviation of Merged Instance Data

In [None]:
e1_concat_path = os.path.join(root_dir, "concatenated_e1.csv")
e2_concat_path = os.path.join(root_dir, "concatenated_e2.csv")

e1_data = pd.read_csv(e1_concat_path)
e2_data = pd.read_csv(e2_concat_path)

In [None]:
def calculate_statistics(df: pd.DataFrame, encoding_name: str):
    """
    Calculate mean, median, and standard deviation for selected columns grouped by 'N'

    Args:
        df: Input data for an encoding
        encoding_name: The name of the encoding ('E1', 'E2', etc.)

    Returns:
        DataFrame: Statistics table with mean, median, and std for each column grouped by 'N'.
    """
    # Columns to calculate statistics for
    columns_to_analyze = ['num_clauses', 'num_variables', 'num_literals']

    # Group by 'N' and calculate mean, median, std
    stats = df.groupby('N')[columns_to_analyze].agg(['mean', 'median', 'std']).reset_index()

    # Flatten multi-level column index
    stats.columns = ['_'.join(col).strip('_') if col[1] else col[0] for col in stats.columns]

    # Add encoding as the first column
    stats.insert(0, 'encoding', encoding_name)

    return stats


e1_stats = calculate_statistics(e1_data, "E1")
e2_stats = calculate_statistics(e2_data, "E2")

combined_stats = pd.concat([e1_stats, e2_stats], ignore_index=True)

stats_output_file = os.path.join(output_dir, 'encoding_statistics.csv')
combined_stats.to_csv(stats_output_file, index=False)

print("Statistics Calculated and Saved To: ", stats_output_file)

### Statistics Calculator

#### Timed Out Instances

In [None]:
# Encoding 1:
tmo_count_e1 = e1_data['status'].value_counts().get('TMO', 0)
tmo_count_e2 = e2_data['status'].value_counts().get('TMO', 0)

print(f"Count of TMO [E1]: {tmo_count_e1}")
print(f"Count of TMO [E2]: {tmo_count_e2}")

#### Total Solving Time

In [None]:
total_solving_time_e1 = e1_data.loc[e1_data['status'] == 'SLV', 'solving_time'].sum()
total_solving_time_e2 = e2_data.loc[e2_data['status'] == 'SLV', 'solving_time'].sum()

print(f"Total solving time for SLV instances [E1]: {total_solving_time_e1}")
print(f"Total solving time for SLV instances [E2]: {total_solving_time_e2}")

#### Calculate Mean, Median and STD for Encoding1

In [None]:
statistics_e1 = {
    'Metric': [],
    'Mean': [],
    'Median': [],
    'Standard Deviation': []
}

columns_to_analyze = ['num_clauses', 'num_variables', 'num_literals']

for column in columns_to_analyze:
    statistics_e1['Metric'].append(column)
    statistics_e1['Mean'].append(e1_data[column].mean())
    statistics_e1['Median'].append(e1_data[column].median())
    statistics_e1['Standard Deviation'].append(e1_data[column].std())

stats_e1 = pd.DataFrame(statistics_e1)

print(stats_e1)

#### Calculate Mean, Median and STD for Encoding2

In [None]:
statistics_e2 = {
    'Metric': [],
    'Mean': [],
    'Median': [],
    'Standard Deviation': []
}

columns_to_analyze = ['num_clauses', 'num_variables', 'num_literals']

for column in columns_to_analyze:
    statistics_e2['Metric'].append(column)
    statistics_e2['Mean'].append(e2_data[column].mean())
    statistics_e2['Median'].append(e2_data[column].median())
    statistics_e2['Standard Deviation'].append(e2_data[column].std())

stats_e2 = pd.DataFrame(statistics_e2)

print(stats_e2)