In [16]:
import shap
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.svm import SVC
from sklearn.ensemble import ExtraTreesRegressor
from lime import lime_tabular
from unravel import core
from sklearn.metrics import accuracy_score, r2_score


In [17]:
import lime
import lime.lime_tabular
from scipy.stats import norm
import random

In [18]:
import pandas as pd
import numpy as np
import joblib
from sklearn.svm import SVC
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import train_test_split
from lime.lime_tabular import LimeTabularExplainer
import GPyOpt
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, r2_score
from sklearn.metrics import jaccard_score
from tqdm import tqdm
import os
import itertools
from sklearn.linear_model import LinearRegression
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

In [19]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [20]:
import joblib

In [21]:
# Function to preprocess data
def preprocess_data(df, target_column, categorical_columns=[], n_train=None):
    # Remove rows where target contains NaN values
    df = df.dropna(subset=[target_column])
    
    # Convert categorical features to numeric using OneHotEncoding
    for col in categorical_columns:
        df[col] = df[col].astype(str)  # Ensure categorical columns are strings
        df = pd.get_dummies(df, columns=[col], drop_first=True)

    # Split the dataset into features (X) and target (y)
    X = df.drop(target_column, axis=1)  # Features
    y = df[target_column]  # Target

    # Train-test split
    if n_train:
        X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=n_train, random_state=42)
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardize the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Update feature names to match the new columns after one-hot encoding
    feature_names = X.columns.tolist()  # Get updated feature names after encoding

    return X_train, X_test, y_train, y_test, len(X), len(X.columns), feature_names  # Return updated feature names


1st dataset- Parkinson's Dataset

In [22]:
parkinsons_df = pd.read_csv('/home/aditi23010/BML_PROJECT/data/parkinsons.data')
parkinsons_df = parkinsons_df.drop('name', axis=1)
target_column = 'status'
categorical_columns = []
X_train_parkinsons, X_test_parkinsons, y_train_parkinsons, y_test_parkinsons, n_total_parkinsons, p_parkinsons, feature_names_parkinsons = preprocess_data(parkinsons_df, 'status', [], n_train=175)


In [23]:
# Combine preprocessed data into a DataFrame
parkinsons_train = pd.DataFrame(X_train_parkinsons)
parkinsons_train['status'] = y_train_parkinsons
parkinsons_test = pd.DataFrame(X_test_parkinsons)
parkinsons_test['status'] = y_test_parkinsons

# Save preprocessed Parkinson's data as CSV
parkinsons_train.to_csv('/home/aditi23010/BML_PROJECT/preprocessed_data/parkinsons_train.csv', index=False)
parkinsons_test.to_csv('/home/aditi23010/BML_PROJECT/preprocessed_data/parkinsons_test.csv', index=False)


2nd Dataset- Breast cancer dataset

In [24]:
cancer_df = pd.read_csv('/home/aditi23010/BML_PROJECT/data/cancer.csv')
cancer_df = cancer_df.drop(['id', 'Unnamed: 32'], axis=1)
cancer_df['diagnosis'] = cancer_df['diagnosis'].map({'M': 1, 'B': 0})
target_column = 'diagnosis'
# Updated unpacking for the Cancer dataset
X_train_cancer, X_test_cancer, y_train_cancer, y_test_cancer, n_total_cancer, p_cancer, feature_names_cancer = preprocess_data(cancer_df, 'diagnosis', [], n_train=512)


In [25]:
# Combine preprocessed data into a DataFrame
cancer_train = pd.DataFrame(X_train_cancer)
cancer_train['diagnosis'] = y_train_cancer
cancer_test = pd.DataFrame(X_test_cancer)
cancer_test['diagnosis'] = y_test_cancer

# Save preprocessed Cancer data as CSV
cancer_train.to_csv('/home/aditi23010/BML_PROJECT/preprocessed_data/cancer_train.csv', index=False)
cancer_test.to_csv('/home/aditi23010/BML_PROJECT/preprocessed_data/cancer_test.csv', index=False)


3rd Dataset - Adult Income Dataset

In [26]:

adult_df = pd.read_csv('/home/aditi23010/BML_PROJECT/data/adult.csv')
adult_df = adult_df.replace('?', np.nan)  # Handle missing values
adult_df = adult_df.dropna()  # Drop any rows with missing values
categorical_columns = ['workclass', 'education', 'marital.status', 'occupation', 'relationship', 'race', 'sex', 'native.country']
target_column = 'income'
adult_df[target_column] = adult_df[target_column].map({'<=50K': 0, '>50K': 1})


In [27]:
X_train_adult, X_test_adult, y_train_adult, y_test_adult, n_total_adult, p_adult, feature_names_adult = preprocess_data(adult_df, target_column, categorical_columns)

In [28]:
# Combine preprocessed data into a DataFrame
adult_train = pd.DataFrame(X_train_adult)
adult_train['income'] = y_train_adult
adult_test = pd.DataFrame(X_test_adult)
adult_test['income'] = y_test_adult

# Save preprocessed Adult data as CSV
adult_train.to_csv('/home/aditi23010/BML_PROJECT/preprocessed_data/adult_train.csv', index=False)
adult_test.to_csv('/home/aditi23010/BML_PROJECT/preprocessed_data/adult_test.csv', index=False)


4th Dataset- Boston dataset

In [29]:
boston = fetch_openml(name="boston", version=1, as_frame=True)
X, y = boston.data, boston.target
boston_df = X.copy()
boston_df['MEDV'] = y  # MEDV is the house price (target)
target_column = 'MEDV'
categorical_columns = []
X_train_boston, X_test_boston, y_train_boston, y_test_boston, n_total_boston, p_boston, feature_names_boston = preprocess_data(boston_df, 'MEDV', [], n_train=455)

In [30]:
# Combine preprocessed data into a DataFrame
boston_train = pd.DataFrame(X_train_boston)
boston_train['MEDV'] = y_train_boston
boston_test = pd.DataFrame(X_test_boston)
boston_test['MEDV'] = y_test_boston

# Save preprocessed Boston data as CSV
boston_train.to_csv('/home/aditi23010/BML_PROJECT/preprocessed_data/boston_train.csv', index=False)
boston_test.to_csv('/home/aditi23010/BML_PROJECT/preprocessed_data/boston_test.csv', index=False)


5th Dataset- Body fat Dataset

In [31]:
bodyfat_df = pd.read_csv('/home/aditi23010/BML_PROJECT/data/bodyfat.csv')
target_column = 'BodyFat'
X_train_bodyfat, X_test_bodyfat, y_train_bodyfat, y_test_bodyfat, n_total_bodyfat, p_bodyfat, feature_names_bodyfat = preprocess_data(bodyfat_df, 'BodyFat', [], n_train=226)

In [32]:
# Combine preprocessed data into a DataFrame
bodyfat_train = pd.DataFrame(X_train_bodyfat)
bodyfat_train['BodyFat'] = y_train_bodyfat
bodyfat_test = pd.DataFrame(X_test_bodyfat)
bodyfat_test['BodyFat'] = y_test_bodyfat

# Save preprocessed Body Fat data as CSV
bodyfat_train.to_csv('/home/aditi23010/BML_PROJECT/preprocessed_data/bodyfat_train.csv', index=False)
bodyfat_test.to_csv('/home/aditi23010/BML_PROJECT/preprocessed_data/bodyfat_test.csv', index=False)

In [33]:
print("Preprocessed datasets saved successfully.")

Preprocessed datasets saved successfully.


Model training amd Evaluation

In [34]:
# Train and evaluate classification models (SVM)
def train_svm_classifier(X_train, X_test, y_train, y_test, model_name):
    """
    Train and evaluate an SVM classifier.
    :return: accuracy of the model
    """
    # Train SVM classifier
    model = SVC(kernel='linear', random_state=42, probability=True)  # Set probability=True for LIME compatibility
    model.fit(X_train, y_train)

    # Save the trained model
    joblib.dump(model, f'/home/aditi23010/BML_PROJECT/models/{model_name}_svm_model.pkl')

    # Predict on test data
    y_pred = model.predict(X_test)

    # Evaluate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy


# Train and evaluate regression models (Extra Trees Regressor)
def train_extra_trees_regressor(X_train, X_test, y_train, y_test, model_name):
    """
    Train and evaluate an Extra Trees Regressor.
    :return: R² score of the model
    """
    # Train Extra Trees Regressor
    model = ExtraTreesRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Save the trained model
    joblib.dump(model, f'/home/aditi23010/BML_PROJECT/models/{model_name}_extra_trees_model.pkl')

    # Predict on test data
    y_pred = model.predict(X_test)

    # Evaluate R² Score
    r2 = r2_score(y_test, y_pred)
    return r2

# Print table with dataset information
def print_dataset_info(name, task_type, p, n_train, n_total, score):
    """
    Print the dataset information.
    :param name: Name of the dataset
    :param task_type: 'C' for classification, 'R' for regression
    :param p: Number of features
    :param n_train: Number of training samples
    :param n_total: Total number of samples
    :param score: Accuracy for classification or R² score for regression
    """
    print(f"{name.ljust(12)} {task_type} {str(p).ljust(4)} {str(n_train).ljust(6)} {str(n_total).ljust(7)} {score:.2f}")

# Header for the table
print("Dataset      Task p    n_train n_total R²/Accuracy")

Dataset      Task p    n_train n_total R²/Accuracy


In [35]:
# Create models directory if not exists
if not os.path.exists('models'):
    os.makedirs('models')

In [36]:
# Train models and collect scores
accuracy_parkinsons = train_svm_classifier(X_train_parkinsons, X_test_parkinsons, y_train_parkinsons, y_test_parkinsons, "parkinsons")
accuracy_cancer = train_svm_classifier(X_train_cancer, X_test_cancer, y_train_cancer, y_test_cancer, "cancer")
accuracy_adult = train_svm_classifier(X_train_adult, X_test_adult, y_train_adult, y_test_adult, "adult")
r2_boston = train_extra_trees_regressor(X_train_boston, X_test_boston, y_train_boston, y_test_boston, "boston")
r2_bodyfat = train_extra_trees_regressor(X_train_bodyfat, X_test_bodyfat, y_train_bodyfat, y_test_bodyfat, "bodyfat")


In [37]:
# Print dataset info
print("Dataset   Task p    n_train n_total R²/Accuracy")
# Adjust the print statements to use shape[0] for sparse matrices
print_dataset_info("Parkinson's", 'C', p_parkinsons, X_train_parkinsons.shape[0], n_total_parkinsons, accuracy_parkinsons)
print_dataset_info("Cancer", 'C', p_cancer, X_train_cancer.shape[0], n_total_cancer, accuracy_cancer)
print_dataset_info("Adult", 'C', p_adult, X_train_adult.shape[0], n_total_adult, accuracy_adult)  # Adjusted line
print_dataset_info("Boston", 'R', p_boston, X_train_boston.shape[0], n_total_boston, r2_boston)
print_dataset_info("BodyFat", 'R', p_bodyfat, X_train_bodyfat.shape[0], n_total_bodyfat, r2_bodyfat)


Dataset   Task p    n_train n_total R²/Accuracy
Parkinson's  C 22   175    195     0.85
Cancer       C 30   512    569     0.96
Adult        C 96   21586  26983   0.85
Boston       R 13   455    506     0.92
BodyFat      R 14   226    252     1.00


In [38]:
from sklearn.model_selection import cross_val_score

def cross_validate_extra_trees(X, y):
    """
    Perform cross-validation on an Extra Trees Regressor.
    :return: mean R² score across the folds
    """
    model = ExtraTreesRegressor(n_estimators=100, random_state=42)
    
    # Perform 5-fold cross-validation and return the mean R² score
    scores = cross_val_score(model, X, y, cv=5, scoring='r2')
    return np.mean(scores)

# Cross-validation for Body Fat Dataset
cross_val_score_bodyfat = cross_validate_extra_trees(X_train_bodyfat, y_train_bodyfat)
print(f"Cross-validation R² score for Body Fat: {cross_val_score_bodyfat:.2f}")


Cross-validation R² score for Body Fat: 0.98


In [39]:
print("Dataset   Task p    n_train n_total R²/Accuracy")
# Using shape[0] for sparse matrices to get the number of samples
print_dataset_info("Parkinson's", 'C', p_parkinsons, X_train_parkinsons.shape[0], n_total_parkinsons, accuracy_parkinsons)
print_dataset_info("Cancer", 'C', p_cancer, X_train_cancer.shape[0], n_total_cancer, accuracy_cancer)
print_dataset_info("Adult", 'C', p_adult, X_train_adult.shape[0], n_total_adult, accuracy_adult)  # Adjusted line
print_dataset_info("Boston", 'R', p_boston, X_train_boston.shape[0], n_total_boston, r2_boston)
print_dataset_info("BodyFat", 'R', p_bodyfat, X_train_bodyfat.shape[0], n_total_bodyfat, r2_bodyfat)


Dataset   Task p    n_train n_total R²/Accuracy
Parkinson's  C 22   175    195     0.85
Cancer       C 30   512    569     0.96
Adult        C 96   21586  26983   0.85
Boston       R 13   455    506     0.92
BodyFat      R 14   226    252     1.00


In [40]:
# Function to compute Jaccard Index
def compute_jaccard_index(set_a, set_b):
    intersection = len(set_a.intersection(set_b))
    union = len(set_a.union(set_b))
    
    return 1 - (intersection / union) if union != 0 else 0

Lime explaination

In [41]:
def generate_lime_explanations(X_train, y_train, X_test, model, feature_names, num_samples=100, num_iterations=50):
    # Ensure X_train is a DataFrame with the correct feature names
    if isinstance(X_train, np.ndarray):
        X_train = pd.DataFrame(X_train, columns=feature_names)
        
    # Create LIME explainer
    explainer = LimeTabularExplainer(X_train.values, feature_names=feature_names, mode='regression')
    
    # Collect explanations for each iteration
    explanation_sets = []
    
    # Ensure that num_samples does not exceed the available samples
    num_samples = min(num_samples, X_test.shape[0])
    
    for _ in tqdm(range(num_iterations)):
        random_indices = np.random.choice(X_test.shape[0], num_samples, replace=False)
        explanations = []
        for i in random_indices:
            exp = explainer.explain_instance(X_test[i], model.predict, num_features=5)  # Use model.predict for regression
            feature_names_explained = {feature[0] for feature in exp.as_list()}
            explanations.append(feature_names_explained)
        explanation_sets.append(explanations)
    
    # Calculate Jaccard distances between explanations
    stability_scores = []
    for i in range(num_iterations):
        for j in range(i + 1, num_iterations):
            for features_i in explanation_sets[i]:
                for features_j in explanation_sets[j]:
                    jaccard_score = compute_jaccard_index(features_i, features_j)
                    stability_scores.append(jaccard_score)

    return np.mean(stability_scores)


In [42]:
# Function to compute Jaccard index
def compute_jaccard_index(set1, set2):
    intersection = len(set1.intersection(set2))
    union = len(set1.union(set2))
    return 1 - (intersection / union)

In [43]:
# Step 1: Fetch the Boston dataset
boston = fetch_openml(name="boston", version=1, as_frame=True)
X, y = boston.data, boston.target
boston_df = X.copy()
boston_df['MEDV'] = y

In [44]:
target_column = 'MEDV'
categorical_columns = []  # No categorical columns in the Boston dataset
X_train_boston, X_test_boston, y_train_boston, y_test_boston, n_total_boston, p_boston, feature_names_boston = preprocess_data(boston_df, target_column, categorical_columns, n_train=455)


In [45]:
model = ExtraTreesRegressor()
model.fit(X_train_boston, y_train_boston)


In [46]:
def compute_jaccard_index(set1, set2):
    intersection = len(set1.intersection(set2))
    union = len(set1.union(set2))
    return 1 - (intersection / union) if union > 0 else 1

In [47]:
datasets = {
    "Parkinson's": (X_train_parkinsons, y_train_parkinsons, X_test_parkinsons),
    "Breast Cancer": (X_train_cancer, y_train_cancer, X_test_cancer),
    "Adult Income": (X_train_adult, y_train_adult, X_test_adult),
    "Boston": (X_train_boston, y_train_boston, X_test_boston),
    "Body Fat": (X_train_bodyfat, y_train_bodyfat, X_test_bodyfat)
}

In [48]:
results_LIME = {}

# Train and compute LIME scores
for name, (X_train, y_train, X_test) in tqdm(datasets.items()):
    if name in ["Parkinson's", "Breast Cancer", "Adult Income"]:  # Classification tasks
        model = SVC(kernel='linear', random_state=42, probability=True)
    else:  # Regression tasks
        model = ExtraTreesRegressor(n_estimators=100, random_state=42)

    model.fit(X_train, y_train)
    
    # Generate LIME explanations and compute stability score
    stability_score = generate_lime_explanations(X_train, y_train, X_test, model, feature_names_parkinsons if name == "Parkinson's" else 
                                                  feature_names_cancer if name == "Breast Cancer" else
                                                  feature_names_adult if name == "Adult Income" else
                                                  feature_names_boston if name == "Boston" else
                                                  feature_names_bodyfat)

    results_LIME[name] = stability_score

100%|██████████| 50/50 [11:53<00:00, 14.28s/it]
100%|██████████| 50/50 [50:35<00:00, 60.71s/it]
100%|██████████| 50/50 [7:39:41<00:00, 551.64s/it]]
100%|██████████| 50/50 [18:54<00:00, 22.68s/it]s/it]
100%|██████████| 50/50 [10:15<00:00, 12.30s/it]/it] 
100%|██████████| 5/5 [9:15:13<00:00, 6662.68s/it]  


In [49]:
for dataset, score in results_LIME.items():
    print(f"Stability Score for LIME on {dataset} dataset: {score:.4f}")

Stability Score for LIME on Parkinson's dataset: 0.8515
Stability Score for LIME on Breast Cancer dataset: 0.8768
Stability Score for LIME on Adult Income dataset: 0.4683
Stability Score for LIME on Boston dataset: 0.7971
Stability Score for LIME on Body Fat dataset: 0.9054


In [50]:
results_df_LIME = pd.DataFrame([results_LIME])  # Convert dict to DataFrame

# Save the DataFrame to CSV
results_df_LIME.to_csv('/home/aditi23010/BML_PROJECT/stability_results_LIME.csv', index=False)

print("Results saved to /home/aditi23010/BML_PROJECT/stability_results_LIME.csv")

Results saved to /home/aditi23010/BML_PROJECT/stability_results_LIME.csv


In [51]:
results_df_LIME

Unnamed: 0,Parkinson's,Breast Cancer,Adult Income,Boston,Body Fat
0,0.85153,0.876756,0.468294,0.797084,0.90536


BayLIME explaination

In [52]:
# Helper function for generating explanations using BayLIME
def generate_baylime_explanations(X_train, y_train, X_test, model, feature_names, num_samples=100, num_iterations=50):
    # Ensure X_train is a DataFrame with the correct feature names
    if isinstance(X_train, np.ndarray):
        X_train = pd.DataFrame(X_train, columns=feature_names)
    
    # Create LIME explainer (BayLIME would modify this with Bayesian interpretation)
    explainer = LimeTabularExplainer(X_train.values, feature_names=feature_names, mode='regression')
    
    # Collect explanations for each iteration
    explanation_sets = []
    
    # Ensure that num_samples does not exceed the available samples
    num_samples = min(num_samples, X_test.shape[0])
    
    for _ in tqdm(range(num_iterations)):
        random_indices = np.random.choice(X_test.shape[0], num_samples, replace=False)
        explanations = []
        for i in random_indices:
            # Get explanation using BayesianRidge instead of linear model
            exp = explainer.explain_instance(X_test[i], model.predict, num_features=5)  # Use model.predict for regression
            feature_names_explained = {feature[0] for feature in exp.as_list()}
            explanations.append(feature_names_explained)
        explanation_sets.append(explanations)
    
    # Calculate Jaccard distances between explanations
    stability_scores = []
    for i in tqdm(range(num_iterations)):
        for j in range(i + 1, num_iterations):
            for features_i in explanation_sets[i]:
                for features_j in explanation_sets[j]:
                    jaccard_score = compute_jaccard_index(features_i, features_j)
                    stability_scores.append(jaccard_score)

    return np.mean(stability_scores)


In [53]:
# Helper function to compute Jaccard index
def compute_jaccard_index(set1, set2):
    intersection = len(set1.intersection(set2))
    union = len(set1.union(set2))
    return 1 - (intersection / union) if union > 0 else 1


In [54]:
from sklearn.linear_model import BayesianRidge

In [55]:
# Function to apply BayLIME across datasets
def run_baylime_on_datasets(datasets, feature_names_dict):
    results_BayLIME = {}
    
    # Train and compute BayLIME stability score for each dataset
    for name, (X_train, y_train, X_test) in tqdm(datasets.items()):
        # Using BayesianRidge for regression tasks
        model = BayesianRidge()

        # Train model
        model.fit(X_train, y_train)

        # Generate BayLIME explanations and compute stability score
        stability_score = generate_baylime_explanations(X_train, y_train, X_test, model, feature_names_dict[name])
        results_BayLIME[name] = stability_score
    
    return results_BayLIME

In [56]:
# Datasets: Using the preprocessed datasets from the previous implementation
datasets = {
    "Parkinson's": (X_train_parkinsons, y_train_parkinsons, X_test_parkinsons),
    # "Breast Cancer": (X_train_cancer, y_train_cancer, X_test_cancer),
    # "Adult Income": (X_train_adult, y_train_adult, X_test_adult),
    # "Boston": (X_train_boston, y_train_boston, X_test_boston),
    # "Body Fat": (X_train_bodyfat, y_train_bodyfat, X_test_bodyfat)
}

In [57]:
# Feature names for each dataset
feature_names_dict = {
    "Parkinson's": feature_names_parkinsons,
    # "Breast Cancer": feature_names_cancer,
    # "Adult Income": feature_names_adult,
    # "Boston": feature_names_boston,
    # "Body Fat": feature_names_bodyfat
}


In [58]:
# Running BayLIME on all datasets
results_BayLIME = run_baylime_on_datasets(datasets, feature_names_dict)


100%|██████████| 50/50 [11:46<00:00, 14.14s/it]
100%|██████████| 50/50 [00:00<00:00, 85.53it/s]
100%|██████████| 1/1 [11:47<00:00, 707.63s/it]


In [59]:
# Print stability scores for BayLIME on each dataset
for dataset, score in results_BayLIME.items():
    print(f"Stability Score for BayLIME on {dataset} dataset: {score:.4f}")


Stability Score for BayLIME on Parkinson's dataset: 0.8212


In [60]:
# Datasets: Using the preprocessed datasets from the previous implementation
datasets = {
    "Parkinson's": (X_train_parkinsons, y_train_parkinsons, X_test_parkinsons),
    "Breast Cancer": (X_train_cancer, y_train_cancer, X_test_cancer),
    "Adult Income": (X_train_adult, y_train_adult, X_test_adult),
    "Boston": (X_train_boston, y_train_boston, X_test_boston),
    "Body Fat": (X_train_bodyfat, y_train_bodyfat, X_test_bodyfat)
}

In [61]:
# Feature names for each dataset
feature_names_dict = {
    "Parkinson's": feature_names_parkinsons,
    "Breast Cancer": feature_names_cancer,
    "Adult Income": feature_names_adult,
    "Boston": feature_names_boston,
    "Body Fat": feature_names_bodyfat
}


In [62]:
# Running BayLIME on all datasets
results_BayLIME = run_baylime_on_datasets(datasets, feature_names_dict)


100%|██████████| 50/50 [11:39<00:00, 13.98s/it]
100%|██████████| 50/50 [00:00<00:00, 92.40it/s] 
100%|██████████| 50/50 [45:27<00:00, 54.56s/it]
100%|██████████| 50/50 [00:04<00:00, 10.17it/s]
100%|██████████| 50/50 [4:15:24<00:00, 306.49s/it]
100%|██████████| 50/50 [00:12<00:00,  3.93it/s]
100%|██████████| 50/50 [15:35<00:00, 18.71s/it]/it]
100%|██████████| 50/50 [00:03<00:00, 13.95it/s]
100%|██████████| 50/50 [08:49<00:00, 10.60s/it]/it]
100%|██████████| 50/50 [00:01<00:00, 48.83it/s]
100%|██████████| 5/5 [5:37:22<00:00, 4048.44s/it]  


In [63]:
# Print stability scores for BayLIME on each dataset
for dataset, score in results_BayLIME.items():
    print(f"Stability Score for BayLIME on {dataset} dataset: {score:.4f}")


Stability Score for BayLIME on Parkinson's dataset: 0.8201
Stability Score for BayLIME on Breast Cancer dataset: 0.8608
Stability Score for BayLIME on Adult Income dataset: 0.5604
Stability Score for BayLIME on Boston dataset: 0.8260
Stability Score for BayLIME on Body Fat dataset: 0.9085


In [64]:
# Save the results to a CSV
results_df_BayLIME = pd.DataFrame([results_BayLIME])
results_df_BayLIME.to_csv('/home/aditi23010/BML_PROJECT/stability_results_BayLIME.csv', index=False)

print("Results saved to /home/aditi23010/BML_PROJECT/stability_results_BayLIME.csv")

Results saved to /home/aditi23010/BML_PROJECT/stability_results_BayLIME.csv
