In [None]:
#imported libraries
import pandas as pd
import shap
from keras.models import load_model
import joblib
import configparser
import numpy as np
import matplotlib.pyplot as plt
import os
import shutil

In [None]:
config = configparser.ConfigParser()

# Read the configuration file
config.read('config.ini')
xai = 'XAI_beeswarm'
lgb_model = config.get(xai, 'lgb_model')
full_dos = config.get(xai,'full_dos_f')
extend_dos = config.get(xai,'extend_dos_f')
content_shift = config.get(xai,'content_shift_f')
fgs = config.get(xai,'fgsm_f')
gamma = config.get(xai,'gamma_f')
c_full_dos = config.get(xai,'c_full_dos_f')
c_extend_dos = config.get(xai,'c_extend_dos_f')
c_content_shift = config.get(xai,'c_content_shift_f')
c_fgs = config.get(xai,'c_fgsm_f')
c_gamma = config.get(xai,'c_gamma_f')

output = config.get(xai, 'output')
if os.path.exists(output):
    shutil.rmtree(output)
os.makedirs(output)

In [None]:
def extract_dir_name(path):
    path = path.rstrip('/')
    plot_name = os.path.basename(path)
    return plot_name
def compute_shap(path):
    plot_name = extract_dir_name(path)
    X = pd.read_csv(path)
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X)
    positive_class_shap_values = shap_values[1]
    feature_names = X.columns.tolist()
    # Computed the mean SHAP values for each feature across all samples.
    mean_shap_values = np.mean(positive_class_shap_values, axis=0)
    # indices of the top 20 features based on the mean SHAP values.
    top_20_features_indices = np.argsort(mean_shap_values)[-20:][::-1]
    # Extracted names of the top 20 features.
    top_20_feature_names = [feature_names[i] for i in top_20_features_indices]
    # 4. Filter out the SHAP values for only the top 20 features.
    top_20_shap_values = positive_class_shap_values[:, top_20_features_indices]
    desired_width_px = 1000
    desired_height_px = 800
    # Set the DPI
    dpi = 200  # Setting a DPI of 100 for example
    # Convert the desired dimensions to inches
    fig_width_in = desired_width_px / dpi
    fig_height_in = desired_height_px / dpi
    # Create a figure object with the desired size in inches
    fig, ax = plt.subplots(figsize=(fig_width_in, fig_height_in), dpi=dpi)
    # Create an Explanation for the top 20 features.
    shap_values_top_20 = shap.Explanation(values=top_20_shap_values, data=X.iloc[:, top_20_features_indices].values, feature_names=top_20_feature_names)
    # beeswarm plot for the top 20 features.
    shap.plots.beeswarm(shap_values_top_20, max_display=20)
    plot_path = os.path.join(output, f"{plot_name}.png")
    fig.savefig(plot_path, bbox_inches='tight', facecolor='w', edgecolor='w', dpi=dpi)
    
    plt.close(fig)


In [None]:
path_pairs = [c_full_dos, full_dos, c_extend_dos, extend_dos, c_content_shift, content_shift , c_fgs, fgs,c_gamma, gamma]
model = joblib.load(lgb_model)
for path in path_pairs:
    compute_shap(path)