***Copyright 2024 by the authors' university which is unnamed because of KDD's anonymity requirement.***

##**Connect to Drive**

In [None]:
from google.colab import drive
from google.colab import files

drive.mount('/content/gdrive/')
print("-"*80)

!ls "/content/gdrive/My Drive/Colab Notebooks/Research/CFE"

import os
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Research/CFE")
print("Current dir: ", os.getcwd())

##**Imports**

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings

from sklearn.svm import LinearSVC
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score, precision_recall_curve, roc_curve, auc
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler

# Suppress warnings
warnings.filterwarnings("ignore")

##**Linear SVM**

In [None]:
df = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/CFE_datasets/Sleep_health_and_lifestyle_dataset.csv')

df['Sleep Disorder'] = df['Sleep Disorder'].map({'None': 0, 'Insomnia': 0, 'Sleep Apnea': 1})

df = df.drop(columns=['Person ID'])
df = df.dropna()
df['Gender'] = df['Gender'].map({'Female': 1, 'Male': 0})

encoder = OrdinalEncoder()
df['Occupation'] = encoder.fit_transform(df[['Occupation']])
df['BMI Category'] = encoder.fit_transform(df[['BMI Category']])

X = df.loc[:, df.columns != 'Sleep Disorder']
y = df['Sleep Disorder']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

params = {
    'C': [1, 5, 10, 25, 50, 100]
}

model = LinearSVC(random_state=42)
cv = StratifiedKFold(n_splits=10)
grid_search = GridSearchCV(estimator=model, param_grid=params, n_jobs=-1, cv=cv, scoring='f1_macro')
grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)
rf_best = grid_search.best_estimator_
print("Best Estimator:", rf_best)

Best Parameters: {'C': 1}
Best Score: 0.39684074790457763
Best Estimator: LinearSVC(C=1, random_state=42)


In [None]:
best = LinearSVC(C=1, random_state=42)
best.fit(X, y)

cv = StratifiedKFold(n_splits=10)
cv_scores = cross_val_score(best, X, y, cv=cv, scoring='f1_macro')

print("Cross-Validation Scores:", cv_scores)
print("Mean CV Accuracy:", cv_scores.mean())
train_predictions = best.predict(X)

train_accuracy = accuracy_score(y, train_predictions)
print("Training Set Accuracy:", train_accuracy)

Cross-Validation Scores: [0.44117647 0.44117647 0.44117647 0.65454545 0.44776119 0.44776119
 0.43939394 0.43939394 0.421875   0.43939394]
Mean CV Accuracy: 0.46136540725516806
Training Set Accuracy: 0.22727272727272727


##**Extracting weights and intercept values**

In [None]:
b = best.intercept_
w = best.coef_

w = w.flatten()

##**Setting the original misclassified *x* feature vector**

In [None]:
og_x = df.drop(columns=['Sleep Disorder'])
predicted_label = best.predict(og_x)

results_df = pd.DataFrame({'Sleep_Disorder_Predicted': predicted_label})
index_class = results_df.index[results_df['Sleep_Disorder_Predicted'] == 1]
og_x_reset = og_x.reset_index(drop=True)
filtered_records = og_x_reset.loc[index_class]
print(len(filtered_records))

365


##**Z3 implementation**

In [None]:
!pip install z3
!pip install z3-solver

Collecting z3
  Downloading z3-0.2.0.tar.gz (24 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting boto (from z3)
  Downloading boto-2.49.0-py2.py3-none-any.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: z3
  Building wheel for z3 (setup.py) ... [?25l[?25hdone
  Created wheel for z3: filename=z3-0.2.0-py3-none-any.whl size=26629 sha256=2e63b4a6e192ca1ef2adf1af36f1a432a07901918cff8487e7fc720e16d3fedf
  Stored in directory: /root/.cache/pip/wheels/5a/b2/60/55b07a5084cad7ab411e395fb1440a2b1a19598bff535a3955
Successfully built z3
Installing collected packages: boto, z3
Successfully installed boto-2.49.0 z3-0.2.0
Collecting z3-solver
  Downloading z3_solver-4.12.4.0-py2.py3-none-manylinux2014_x86_64.whl (56.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.7/56.7 MB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstal

##**Optimizer -- step 1**

In [None]:
from z3 import *
import csv
import time

def abs(x):
    return If(x < 0, -x, x)

def step1(W, B, og_x_np):
    n = W.shape[0]

    # Create a Z3 optimization instance
    opt = Optimize()

    # Create Z3 real variable for X'
    X = [Real(f'x_{i}') for i in range(n)]

    # Gender
    original_value = og_x_np[0]
    if original_value is not None:
        opt.add(X[0] == original_value)

    # Age
    original_value1 = og_x_np[1]
    if original_value1 is not None:
        opt.add(X[1] == original_value1)

    # occupation
    opt.add(Or(X[2] == 1, X[2] == 2, X[2] == 3, X[2] == 4, X[2] == 5, X[2] == 6, X[2] == 7, X[2] == 8, X[2] == 9, X[2] == 10))

    # sleep
    constraint1 = And(X[3] >= 6, X[3] <= 7)
    opt.add(constraint1)

    # sleep quality
    constraint2 = Or(X[4] == 1, X[4] == 2, X[4] == 3, X[4] == 4, X[4] == 5, X[4] == 6, X[4] == 7, X[4] == 8, X[4] == 9, X[4] == 10)
    opt.add(constraint2)

    # physical activity
    phys_a_c = And(X[5] >= 60, X[5] <= 90)
    opt.add(phys_a_c)

    # stress level
    current = og_x_np[6]
    constraint4 = (X[6] <= current)
    opt.add(constraint4)

    constraint5 = Or(X[7] == 1, X[7] == 2, X[7] == 3)
    opt.add(constraint5)

    #blood pressure high
    constraint6 = And(X[8] >= 100, X[8] <= 140)
    opt.add(constraint6)

    #blood pressure low
    constraint7 = And(X[9] >= 70, X[9] <= 90)
    opt.add(constraint7)

    constraint8 = And(X[10] >= 60, X[10] <= 80)
    opt.add(constraint8)

    # steps
    constraint9 = And(X[11] >= 3000, X[11] <= 5000)
    opt.add(constraint9)

    # Calculate the dot product W * X
    dot_product = Sum([W[i] * X[i] for i in range(n)])

    inequality = dot_product + B < 0
    opt.add(inequality)

    l1_distance = Sum([abs(og_x_np[i] - X[i]) for i in range(n)])

    objective = l1_distance
    opt.minimize(objective)

    # Check if there's a solution
    if opt.check() == sat:
        model = opt.model()

        original_f = [og_x_np[i] for i in range(n)]

        # Get the optimized feature vector x_prime
        x_prime_optimized = [model[X[i]].as_decimal(15) for i in range(n)]
        print(f"Optimized x': {x_prime_optimized}")

        # Calculate the optimized distance
        l1_distance_1 = model.eval(objective).as_decimal(15)
        print(f"Optimized distance: {l1_distance_1}")

        # Calculate the optimized cost
        cost_optimized = model.eval(objective).as_decimal(15)
        print(f"Optimized cost: {cost_optimized}")

        return original_f, x_prime_optimized, l1_distance_1, cost_optimized
    else:
        print("````````````FAILED TO FIND AN OPTIMAL SOLUTION.`````````````````````")
        original_f = [og_x_np[i] for i in range(n)]
        return [original_f, '', '', '']

W = w
B = b[0]
output_file = "/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l1/Step1.csv"

for i in range(len(filtered_records)):
    row = filtered_records.iloc[i]
    og_x_np = row.to_numpy()

    start_time = time.time()

    original_f, x_prime_optimized, l1_distance_1, cost_optimized = step1(W, B, og_x_np)

    end_time = time.time()
    runtime = end_time - start_time

    with open(output_file, 'a', newline='') as file:
        writer = csv.writer(file)
        data_row = [original_f] + [x_prime_optimized] + [l1_distance_1] + [cost_optimized] + [runtime]
        writer.writerow(data_row)

In [None]:
# Read the CSV file into a DataFrame
column_names = ['Original_x', 'Optimal_x', 'Cost', 'Objective', 'Runtime']
df1 = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l1/Step1.csv', names = column_names)

def remove_question_mark(cell_value):
    return cell_value.replace('?', '') if isinstance(cell_value, str) else cell_value

def remove_question_c(cell_value):
    return cell_value.replace('[', '') if isinstance(cell_value, str) else cell_value

def remove_question_cc(cell_value):
    return cell_value.replace(']', '') if isinstance(cell_value, str) else cell_value

def convert_to_float(lst_str):
    lst_str = str(lst_str)
    lst = lst_str.split(", ")
    print(lst)
    return np.array([float(item.strip("'")) for item in lst])

cleaned_df = df1.applymap(remove_question_mark)
cleaned_df['Optimal_x'] = cleaned_df['Optimal_x'].map(remove_question_c)
cleaned_df['Optimal_x'] = cleaned_df['Optimal_x'].map(remove_question_cc)

# Apply the function to each row in the DataFrame
cleaned_df['Optimal_x'] = cleaned_df['Optimal_x'].apply(convert_to_float)
cleaned_df['Optimal_x'] = cleaned_df['Optimal_x'].map(remove_question_c)
cleaned_df['Optimal_x'] = cleaned_df['Optimal_x'].map(remove_question_cc)

optimized = cleaned_df.dropna()
left = cleaned_df[cleaned_df.isna().any(axis=1)]

optimized.to_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l1/Step1_optimized.csv', index=False)
left.to_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l1/Step1_failed.csv', index=False)

for i in range(len(optimized['Optimal_x'])):
  x = optimized['Optimal_x'].iloc[i]
  predicted_label = best.predict(x.reshape(1, -1))
  print(predicted_label)

##**Optimizer -- step 2**

In [None]:
def step2(W, B, row):
    n = W.shape[0]

    # Create a Z3 optimization instance
    opt = Optimize()

    # Create Z3 real variable for X'
    X = [Real(f'x_{i}') for i in range(n)]

    #Gender
    original_value = row[0]
    if original_value is not None:
        opt.add(X[0] == original_value)

    #Age
    original_value1 = row[1]
    if original_value1 is not None:
        opt.add(X[1] == original_value1)

    #occupation
    opt.add(Or(X[2] == 1, X[2] == 2, X[2] == 3, X[2] == 4, X[2] == 5, X[2] == 6, X[2] == 7, X[2] == 8, X[2] == 9, X[2] == 10))

    #sleep
    constraint1 = And(X[3] >= 6, X[3] <= 7)
    opt.add(constraint1)

    #sleep quality
    constraint2 = Or(X[4] == 1, X[4] == 2, X[4] == 3, X[4] == 4, X[4] == 5, X[4] == 6, X[4] == 7, X[4] == 8, X[4] == 9, X[4] == 10)
    opt.add(constraint2)

    # physical activity
    slack_pha = Real('slack_pha')
    slack_pha_constraint = (slack_pha >= 0)
    constraint3 = And(X[5] > 0, X[5] >= 60 - slack_pha, X[5] <= 90 + slack_pha)
    opt.add(constraint3, slack_pha_constraint)

    #stress level
    current = og_x_np[6]
    constraint4 = (X[6] <= current)
    opt.add(constraint4)

    #bmi category
    constraint5 = Or(X[7] == 1, X[7] ==2, X[7] == 3)
    opt.add(constraint5)

    #blood pressure high
    constraint6 = And(X[8]>= 100, X[8]<= 140)
    opt.add(constraint6)

    #blood pressure low
    constraint7 = And(X[9]>= 70, X[9]<= 90)
    opt.add(constraint7)

    #heart rate
    constraint8 = And(X[10]>= 60, X[10]<= 80)
    opt.add(constraint8)

    # slack_steps
    constraint9 = And(X[11] >= 3000, X[11] <= 5000)
    opt.add(constraint9)

    # Calculate the dot product W * X
    dot_product = Sum([W[i] * X[i] for i in range(n)])
    inequality = dot_product + B < 0
    opt.add(inequality)

    objective =  (slack_pha)
    opt.minimize(objective)

    # Check if there's a solution
    if opt.check() == sat:
        model = opt.model()

        original_f = [row[i] for i in range(n)]

        # Get the optimized feature vector x_prime
        x_prime_optimized = [model[X[i]].as_decimal(15) for i in range(n)]
        print(f"Optimized x': {x_prime_optimized}")

        # Calculate the optimized distance
        l1_distance_1 = model.eval(objective).as_decimal(15)
        print(f"Optimized distance: {l1_distance_1}")

        # Calculate the optimized cost
        cost_optimized = model.eval(objective).as_decimal(15)
        print(f"Optimized cost: {cost_optimized}")

        slack_pha = model[slack_pha].as_decimal(15)
        print(f"Slack physicial activity value: {slack_pha}")

        return original_f, x_prime_optimized, l1_distance_1, cost_optimized, slack_pha

    else:
        print("````````````FAILED TO FIND AN OPTIMAL SOLUTION.`````````````````````")
        original_f = [row[i] for i in range(n)]
        return [original_f, '', '', '', '']

W = w
B = b[0]
output_file1 = "/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l1/Step2.csv"
input_file = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l1/Step1_failed.csv')

input_file['Original_x'] = input_file['Original_x'].map(remove_question_c)
input_file['Original_x'] = input_file['Original_x'].map(remove_question_cc)

input_file['Original_x'] = input_file['Original_x'].apply(convert_to_float)

for i in range(len(input_file['Original_x'])):
    row = input_file['Original_x'].iloc[i]
    start_time = time.time()
    original_f, x_prime_optimized, cost, cost_optimized, slack_pha = step2(W, B, row)

    end_time = time.time()
    runtime = end_time - start_time

    with open(output_file1, 'a', newline='') as file:
        writer = csv.writer(file)
        data_row = [original_f] + [x_prime_optimized] + [cost] + [cost_optimized] + [runtime] + [slack_pha]
        writer.writerow(data_row)

['0.0', '27.0', '9.0', '6.1', '6.0', '42.0', '6.0', '3.0', '126.0', '83.0', '77.0', '4200.0']
['0.0', '28.0', '1.0', '6.2', '6.0', '60.0', '8.0', '0.0', '125.0', '80.0', '75.0', '10000.0']
['0.0', '28.0', '1.0', '6.2', '6.0', '60.0', '8.0', '0.0', '125.0', '80.0', '75.0', '10000.0']
['0.0', '28.0', '6.0', '5.9', '4.0', '30.0', '8.0', '2.0', '140.0', '90.0', '85.0', '3000.0']
['0.0', '28.0', '6.0', '5.9', '4.0', '30.0', '8.0', '2.0', '140.0', '90.0', '85.0', '3000.0']
['0.0', '28.0', '9.0', '5.9', '4.0', '30.0', '8.0', '2.0', '140.0', '90.0', '85.0', '3000.0']
['0.0', '29.0', '10.0', '6.3', '6.0', '40.0', '7.0', '2.0', '140.0', '90.0', '82.0', '3500.0']
['0.0', '29.0', '1.0', '7.8', '7.0', '75.0', '6.0', '0.0', '120.0', '80.0', '70.0', '8000.0']
['0.0', '29.0', '1.0', '7.8', '7.0', '75.0', '6.0', '0.0', '120.0', '80.0', '70.0', '8000.0']
['0.0', '29.0', '1.0', '7.8', '7.0', '75.0', '6.0', '0.0', '120.0', '80.0', '70.0', '8000.0']
['0.0', '29.0', '1.0', '7.8', '7.0', '75.0', '6.0', '0.0'

In [None]:
column_names = ['Original_x', 'Optimal_x', 'Cost', 'Objective', 'Runtime', 'Slack_pha']
df_step2 = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l1/Step2.csv', names = column_names)

df_step2 = df_step2.applymap(remove_question_mark)
df_step2['Optimal_x'] = df_step2['Optimal_x'].map(remove_question_c)
df_step2['Optimal_x'] = df_step2['Optimal_x'].map(remove_question_cc)

# Apply the function to each row in the DataFrame
df_step2['Optimal_x'] = df_step2['Optimal_x'].apply(convert_to_float)
df_step2['Optimal_x'] = df_step2['Optimal_x'].map(remove_question_c)
df_step2['Optimal_x'] = df_step2['Optimal_x'].map(remove_question_cc)

df_step2.to_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l1/Step2.csv', index=False)

for i in range(len(df_step2['Optimal_x'])):
  x = df_step2['Optimal_x'].iloc[i]
  predicted_label = best.predict(x.reshape(1, -1))
  print(predicted_label)

##**Optimizer -- step 3**

In [None]:
def step3(W, B, row, slack_value2):
    n = W.shape[0]

    # Create a Z3 optimization instance
    opt = Optimize()

    # Create Z3 real variable for X'
    X = [Real(f'x_{i}') for i in range(n)]

    #Gender
    original_value = row[0]
    if original_value is not None:
        opt.add(X[0] == original_value)

    #Age
    original_value1 = row[1]
    if original_value1 is not None:
        opt.add(X[1] == original_value1)

    #occupation
    opt.add(Or(X[2] == 1, X[2] == 2, X[2] == 3, X[2] == 4, X[2] == 5, X[2] == 6, X[2] == 7, X[2] == 8, X[2] == 9, X[2] == 10))

    #sleep
    constraint1 = And(X[3] >= 6, X[3] <= 7)
    opt.add(constraint1)

    #sleep quality
    constraint2 = Or(X[4] == 1, X[4] == 2, X[4] == 3, X[4] == 4, X[4] == 5, X[4] == 6, X[4] == 7, X[4] == 8, X[4] == 9, X[4] == 10)
    opt.add(constraint2)

    #physical activity
    constraint3 = And(X[5] >= 60 - slack_value2, X[5] <= 90 + slack_value2)
    opt.add(constraint3)

    #stress level
    current = row[6]
    constraint4 = (X[6] <= current)
    opt.add(constraint4)

    #bmi category
    constraint5 = Or(X[7] == 1, X[7] ==2, X[7] == 3)
    opt.add(constraint5)

    #blood pressure high
    constraint6 = And(X[8]>= 100, X[8]<= 140)
    opt.add(constraint6)

    #blood pressure low
    constraint7 = And(X[9]>= 70, X[9]<= 90)
    opt.add(constraint7)

    #heart rate
    constraint8 = And(X[10] >= 60, X[10] <= 80)
    opt.add(constraint8)

    constraint9 = And(X[11] >= 3000, X[11] <= 5000)
    opt.add(constraint9)

    # Calculate the dot product W * X
    dot_product = Sum([W[i] * X[i] for i in range(n)])
    inequality = dot_product + B < 0
    opt.add(inequality)

    l0 = Sum([If(row[i] != X[i], 1, 0) for i in range(n)])
    objective = l0
    opt.minimize(objective)

    # Check if there's a solution
    if opt.check() == sat:
        model = opt.model()

        original_f = [row[i] for i in range(n)]

        x_prime_optimized = [model[X[i]].as_decimal(15) for i in range(n)]
        print(f"Optimized x': {x_prime_optimized}")

        # Calculate the optimized distance
        l1_distance_1 = model.eval(objective)
        print(f"Distance: {l1_distance_1}")

        # Calculate the optimized distance
        cost_optimized = model.eval(objective)
        print(f"Objective: {cost_optimized}")

        return original_f, x_prime_optimized, l1_distance_1, cost_optimized
    else:
        print("````````````FAILED TO FIND AN OPTIMAL SOLUTION.`````````````````````")

        original_f = [row[i] for i in range(n)]
        return [original_f, '', '', '']

W = w
B = b[0]
out = '/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l0/Step3.csv'
last = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l1/Step2.csv')

last['Original_x'] = last['Original_x'].map(remove_question_c)
last['Original_x'] = last['Original_x'].map(remove_question_cc)
last['Original_x'] = last['Original_x'].apply(convert_to_float)

for i in range(len(last['Original_x'])):
    row = last['Original_x'].iloc[i]
    slack_value2 = last['Slack_pha'].iloc[i]
    start_time = time.time()
    original_f, x_prime_optimized, l1_distance_1, cost_optimized = step3(W, B, row, slack_value2)
    end_time = time.time()
    runtime = end_time - start_time

    with open(out, 'a', newline='') as file:
        writer = csv.writer(file)
        data_row = [original_f] + [x_prime_optimized] + [l1_distance_1] + [cost_optimized] + [runtime]
        writer.writerow(data_row)

['0.0', '27.0', '9.0', '6.1', '6.0', '42.0', '6.0', '3.0', '126.0', '83.0', '77.0', '4200.0']
['0.0', '28.0', '1.0', '6.2', '6.0', '60.0', '8.0', '0.0', '125.0', '80.0', '75.0', '10000.0']
['0.0', '28.0', '1.0', '6.2', '6.0', '60.0', '8.0', '0.0', '125.0', '80.0', '75.0', '10000.0']
['0.0', '28.0', '6.0', '5.9', '4.0', '30.0', '8.0', '2.0', '140.0', '90.0', '85.0', '3000.0']
['0.0', '28.0', '6.0', '5.9', '4.0', '30.0', '8.0', '2.0', '140.0', '90.0', '85.0', '3000.0']
['0.0', '28.0', '9.0', '5.9', '4.0', '30.0', '8.0', '2.0', '140.0', '90.0', '85.0', '3000.0']
['0.0', '29.0', '10.0', '6.3', '6.0', '40.0', '7.0', '2.0', '140.0', '90.0', '82.0', '3500.0']
['0.0', '29.0', '1.0', '7.8', '7.0', '75.0', '6.0', '0.0', '120.0', '80.0', '70.0', '8000.0']
['0.0', '29.0', '1.0', '7.8', '7.0', '75.0', '6.0', '0.0', '120.0', '80.0', '70.0', '8000.0']
['0.0', '29.0', '1.0', '7.8', '7.0', '75.0', '6.0', '0.0', '120.0', '80.0', '70.0', '8000.0']
['0.0', '29.0', '1.0', '7.8', '7.0', '75.0', '6.0', '0.0'

In [None]:
# Read the CSV file into a DataFrame
column_names = ['Original_x', 'Optimal_x', 'Cost', 'Objective', 'Runtime']
df_step3 = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l0/Step3.csv', names = column_names)

df_step3 = df_step3.applymap(remove_question_mark)
df_step3['Optimal_x'] = df_step3['Optimal_x'].map(remove_question_c)
df_step3['Optimal_x'] = df_step3['Optimal_x'].map(remove_question_cc)

# Apply the function to each row in the DataFrame
df_step3['Optimal_x'] = df_step3['Optimal_x'].apply(convert_to_float)
df_step3['Optimal_x'] = df_step3['Optimal_x'].map(remove_question_c)
df_step3['Optimal_x'] = df_step3['Optimal_x'].map(remove_question_cc)

df_step3.to_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l0/Step3.csv', index=False)

for i in range(len(df_step3['Optimal_x'])):
  x = df_step3['Optimal_x'].iloc[i]
  predicted_label = best.predict(x.reshape(1, -1))
  print(predicted_label)

["'0'", "'27'", "'1'", "'6.1'", "'10'", "'35.71633437694371'", "'6'", "'1'", "'140'", "'70'", "'80'", "'5000'"]
["'0'", "'28'", "'1'", "'6.2'", "'10'", "'35.547216807847065'", "'8'", "'1'", "'140'", "'70'", "'80'", "'5000'"]
["'0'", "'28'", "'1'", "'6.2'", "'10'", "'35.547216807847065'", "'8'", "'1'", "'140'", "'70'", "'80'", "'5000'"]
["'0'", "'28'", "'1'", "'7'", "'10'", "'35.547216807847065'", "'8'", "'2'", "'140'", "'70'", "'80'", "'5000'"]
["'0'", "'28'", "'1'", "'7'", "'10'", "'35.547216807847065'", "'8'", "'2'", "'140'", "'70'", "'80'", "'5000'"]
["'0'", "'28'", "'1'", "'7'", "'10'", "'35.547216807847065'", "'8'", "'2'", "'140'", "'70'", "'80'", "'5000'"]
["'0'", "'29'", "'1'", "'6.3'", "'10'", "'35.37809923875041'", "'7'", "'2'", "'140'", "'70'", "'80'", "'5000'"]
["'0'", "'29'", "'1'", "'7'", "'10'", "'35.37809923875041'", "'6'", "'2'", "'140'", "'70'", "'80'", "'5000'"]
["'0'", "'29'", "'1'", "'7'", "'10'", "'35.37809923875041'", "'6'", "'1'", "'140'", "'70'", "'80'", "'5000'

## **Average runtimes for l0 & l1**

In [None]:
final_l0 = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l0/Step3.csv')
l0_runtime = final_l0.loc[:, 'Runtime'].mean()

print('L0 runtime', l0_runtime)

In [None]:
d3_prior = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l1/Step2.csv')
d3_initial = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l1/Step1_failed.csv')
d1 = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l1/Step1_optimized.csv')
d = pd.concat([d1, d3_initial])

l1_runtime = d.loc[:, 'Runtime'].mean()
print("L1 runtime", l1_runtime)

## **Percentage of relaxations**

In [None]:
relaxed_df = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l1/Step2.csv')
needed_relaxing = len(relaxed_df)

subset_of_experiment = len(filtered_records)

percentage_of_relaxations = (needed_relaxing/subset_of_experiment)*100
print(percentage_of_relaxations)

### **Average percentage of relaxed soft constraints**

In [None]:
relaxed_df = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Research/CFE/Public/ImplementationResults/Health_lifestyle_l1/Step2.csv')
number_of_slacks_per_record = 1

count_s1 = 0
total_count = 0
total_sum = 0
for i in range(len(relaxed_df)):
    if relaxed_df['Slack_pha'].iloc[i] != 0:
        count_s1 = 1
    elif relaxed_df['Slack_pha'].iloc[i] == 0:
        count_s1 = 0
    total_count = count_s1
    total_sum += (total_count / number_of_slacks_per_record)

average_per_dataset = total_sum / len(relaxed_df) * 100

print(average_per_dataset)

100.0
