In [3]:
import pandas as pd

file_path = r"C:\Users\wikto\PycharmProjects\ce-robustness\framework\experiments\thesis\raw_results\diabetes_lof13.csv"
pd.set_option('display.max_columns', None)
df = pd.read_csv(file_path)

unique_lof_values = df['base_counterfactual_lof'].unique()
print("Unique values in 'LOF' column:")
print(unique_lof_values)

Unique values in 'LOF' column:
['[1]' '[-1]']


In [4]:
lof_counts = df['base_counterfactual_lof'].value_counts(dropna=False)
print("Counts of unique values in 'base_counterfactual_lof' column:")
print(lof_counts)

Counts of unique values in 'base_counterfactual_lof' column:
base_counterfactual_lof
[1]     25750
[-1]     6650
Name: count, dtype: int64


In [5]:
import pandas as pd

file_path = r"C:\Users\wikto\PycharmProjects\ce-robustness\framework\experiments\thesis\raw_results\diabetes_lof13.csv"
df = pd.read_csv(file_path)

def clean_lof(val):
    if pd.isna(val):
        return None
    val = str(val).strip('[]')  
    if val == '1':
        return 1
    elif val == '-1':
        return 0
    else:
        return None

df['cleaned_lof'] = df['base_counterfactual_lof'].apply(clean_lof)

mean_lof = df['cleaned_lof'].mean()
print(f"Mean LOF (with -1 as 0): {mean_lof}")

Mean LOF (with -1 as 0): 0.7947530864197531


In [6]:
import pandas as pd
import os

path = r"C:\Users\wikto\PycharmProjects\ce-robustness\framework\experiments\thesis\raw_results"

files = [
    "rice_lof13.csv",
    "heloc_lof13.csv",
    "wine_quality_lof13.csv",
    "diabetes_lof13.csv",
    "car_eval_lof13.csv",
]

for file in files:
    full_path = os.path.join(path, file)

    df = pd.read_csv(full_path)

    df_cleaned = df[df['base_counterfactual'].notna() & (df['base_counterfactual'] != "None")]

    name, ext = os.path.splitext(file)
    new_file = f"{name}_cleaned{ext}"
    new_path = os.path.join(path, new_file)

    df_cleaned.to_csv(new_path, index=False)

    print(f"Saved cleaned file: {new_file} ({len(df) - len(df_cleaned)} rows removed)")

Saved cleaned file: rice_lof13_cleaned.csv (0 rows removed)
Saved cleaned file: heloc_lof13_cleaned.csv (0 rows removed)
Saved cleaned file: wine_quality_lof13_cleaned.csv (0 rows removed)
Saved cleaned file: diabetes_lof13_cleaned.csv (0 rows removed)
Saved cleaned file: car_eval_lof13_cleaned.csv (0 rows removed)


In [7]:
import pandas as pd

# Load the CSV file
file_path = r"C:\Users\wikto\PycharmProjects\ce-robustness\framework\experiments\thesis\cleaned_results\diabetes_lof13_cleaned.csv"
df = pd.read_csv(file_path)

col = "base_counterfactual"

if col in df.columns:
    null_count = df[col].isna().sum()
    print(f"\nNumber of NaN/None/null values: {null_count}")
else:
    print(f"'{col}' column not found in the CSV.")


Number of NaN/None/null values: 0


In [9]:
import pandas as pd
import ast
import numpy as np

path = r"C:\Users\wikto\PycharmProjects\ce-robustness\framework\experiments\thesis\acc_change.csv"

df = pd.read_csv(path)

df['acc_m2_list'] = df['acc_m2'].apply(lambda x: ast.literal_eval(x))

df['acc_m2_mean'] = df['acc_m2_list'].apply(np.mean)
df['acc_m2_mean'] = df['acc_m2_mean'].round(3)

df['acc_m2_std'] = df['acc_m2_list'].apply(np.std)  
df['acc_m2_std'] = df['acc_m2_std'].round(3)

df = df.drop(columns=['acc_m2_list'])

result = (
    df.groupby(['dataset_name', 'exp_type'])
      .agg(
          mean_acc_m1_mean=('acc_m1', 'mean'),
          mean_acc_m2_mean=('acc_m2_mean', 'mean'),
          mean_acc_m2_std=('acc_m2_std', 'mean')
      )
      .reset_index()
)

result = result.round(3)

result

Unnamed: 0,dataset_name,exp_type,mean_acc_m1_mean,mean_acc_m2_mean,mean_acc_m2_std
0,car_eval,Architecture,0.939,0.939,0.021
1,car_eval,Bootstrap,0.946,0.947,0.012
2,car_eval,Weights,0.939,0.87,0.02
3,diabetes,Architecture,0.699,0.696,0.033
4,diabetes,Bootstrap,0.749,0.746,0.019
5,diabetes,Weights,0.709,0.699,0.013
6,heloc,Architecture,0.717,0.714,0.015
7,heloc,Bootstrap,0.724,0.722,0.011
8,heloc,Weights,0.72,0.699,0.019
9,rice,Architecture,0.923,0.924,0.003


In [10]:
result = result.sort_values(['exp_type', 'dataset_name'])

latex_str = "\\begin{tabular}{llrrr}\n\\toprule\nExp Type & Dataset & Mean Acc M1 & Mean Acc M2 & Mean Std M2 \\\\\n\\midrule\n"

current_exp = None
for _, row in result.iterrows():
    if row['exp_type'] != current_exp:
        count = sum(result['exp_type'] == row['exp_type'])
        latex_str += f"\\multirow{{{count}}}{{*}}{{{row['exp_type']}}} & {row['dataset_name']} & {row['mean_acc_m1_mean']:.3f} & {row['mean_acc_m2_mean']:.3f} & {row['mean_acc_m2_std']:.3f} \\\\\n"
        current_exp = row['exp_type']
    else:
        latex_str += f"& {row['dataset_name']} & {row['mean_acc_m1_mean']:.3f} & {row['mean_acc_m2_mean']:.3f} & {row['mean_acc_m2_std']:.3f} \\\\\n"

latex_str += "\\bottomrule\n\\end{tabular}"

print(latex_str)

\begin{tabular}{llrrr}
\toprule
Exp Type & Dataset & Mean Acc M1 & Mean Acc M2 & Mean Std M2 \\
\midrule
\multirow{5}{*}{Architecture} & car_eval & 0.939 & 0.939 & 0.021 \\
& diabetes & 0.699 & 0.696 & 0.033 \\
& heloc & 0.717 & 0.714 & 0.015 \\
& rice & 0.923 & 0.924 & 0.003 \\
& wine_quality & 0.741 & 0.742 & 0.008 \\
\multirow{5}{*}{Bootstrap} & car_eval & 0.946 & 0.947 & 0.012 \\
& diabetes & 0.749 & 0.746 & 0.019 \\
& heloc & 0.724 & 0.722 & 0.011 \\
& rice & 0.926 & 0.926 & 0.003 \\
& wine_quality & 0.751 & 0.752 & 0.006 \\
\multirow{5}{*}{Weights} & car_eval & 0.939 & 0.870 & 0.020 \\
& diabetes & 0.709 & 0.699 & 0.013 \\
& heloc & 0.720 & 0.699 & 0.019 \\
& rice & 0.925 & 0.894 & 0.023 \\
& wine_quality & 0.746 & 0.726 & 0.014 \\
\bottomrule
\end{tabular}


In [11]:
import pandas as pd

file_path = r"C:\Users\wikto\PycharmProjects\ce-robustness\framework\data\wine_quality.csv"

df = pd.read_csv(file_path)

print(f"Instances (rows): {df.shape[0]}")
print(f"Attributes (columns): {df.shape[1]}")

print("\nColumn Data Types:")
print(df.dtypes)

print("\nAttribute Types:")
for col in df.columns:
    if pd.api.types.is_numeric_dtype(df[col]):
        print(f"{col}: Numerical")
    else:
        print(f"{col}: Categorical / Non-numeric")

Instances (rows): 6497
Attributes (columns): 12

Column Data Types:
fixed_acidity           float64
volatile_acidity        float64
citric_acid             float64
residual_sugar          float64
chlorides               float64
free_sulfur_dioxide     float64
total_sulfur_dioxide    float64
density                 float64
pH                      float64
sulphates               float64
alcohol                 float64
quality                    bool
dtype: object

Attribute Types:
fixed_acidity: Numerical
volatile_acidity: Numerical
citric_acid: Numerical
residual_sugar: Numerical
chlorides: Numerical
free_sulfur_dioxide: Numerical
total_sulfur_dioxide: Numerical
density: Numerical
pH: Numerical
sulphates: Numerical
alcohol: Numerical
quality: Numerical
