In [1]:
import os
import shap
import matplotlib.pyplot as plt
def explain_shap(model, X, save_folder, model_name,features):
    if hasattr(model, 'predict'):
        explainer = shap.Explainer(model.predict, X)
    elif hasattr(model, 'decision_function'):
        explainer = shap.Explainer(model.decision_function, X)
    else:
        raise ValueError("Unsupported model type. Please provide a model with 'predict' or 'decision_function' method.")

    shap_values = explainer(X)

    os.makedirs(save_folder, exist_ok=True)  # Create the folder if it doesn't exist
    
    # Save summary plot with model name
    summary_plot_filename = f'{model_name}_summary_plot.png'
    shap.summary_plot(shap_values, X, feature_names=features, max_display=20, show=False)
    plt.savefig(os.path.join(save_folder, summary_plot_filename))  # Save the plot to the specified folder
    
    return shap_values

Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)


In [2]:
import pandas as pd

def read_csv_to_dataframe(file_path):
    try:
        df = pd.read_csv(file_path)
        return df
    except Exception as e:
        print("An error occurred:", e)
        return None

In [3]:
smoted_df=read_csv_to_dataframe('/home/nalin21478/ML-Flight-Delay-Prediction/Code/smoted_data.csv')

In [4]:
smoted_df

Unnamed: 0,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,DEST,CRS_ELAPSED_TIME,DISTANCE,Temperature,Dew Point,Humidity,Wind,Wind Speed,Wind Gust,Pressure,Condition,sch_dep,sch_arr,DEP_DELAY,Delayed
0,11,1,5,10,-0.848855,-0.711515,0.761435,0.184485,-0.038486,15,2.069328,2.821704,-0.727985,3,-2.330480,-1.390801,-1,0
1,11,1,5,28,1.217334,1.354774,0.761435,0.184485,-0.038486,15,2.069328,2.821704,-0.727985,3,-2.330480,-1.390801,-7,0
2,11,1,5,20,-0.372043,-0.224999,0.761435,0.184485,-0.038486,15,2.069328,2.821704,-0.727985,3,-2.330480,-1.390801,40,1
3,11,1,5,30,-0.480789,-0.365448,0.761435,0.184485,-0.038486,15,2.069328,2.821704,-0.727985,3,-2.330480,-1.390801,-2,0
4,11,1,5,1,-0.723378,-0.572189,0.494669,0.002399,-0.038486,15,1.907431,2.563372,-0.555007,3,-2.330480,-1.390801,-4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46877,12,17,2,40,1.226914,1.298095,-1.105928,0.002399,1.404212,7,0.438971,-0.450508,-1.281515,0,-0.429503,0.064138,18,1
46878,1,10,5,42,0.918772,0.872098,0.602231,0.895527,1.038694,15,-0.098586,-0.450508,1.222024,0,-0.599080,-1.269124,25,1
46879,11,3,7,32,0.455708,0.366153,1.161585,-0.315601,-0.716097,15,-0.359121,-0.450508,0.274794,15,-0.010076,0.243411,40,1
46880,12,30,1,13,-0.698283,-0.778298,0.104635,0.598274,1.080887,17,-0.459632,-0.450508,-1.399890,15,-0.479051,0.416960,42,1


In [5]:
smoted_df=smoted_df.drop(['DEP_DELAY'],axis=1)

In [6]:
smoted_df.columns

Index(['MONTH', 'DAY_OF_MONTH', 'DAY_OF_WEEK', 'DEST', 'CRS_ELAPSED_TIME',
       'DISTANCE', 'Temperature', 'Dew Point', 'Humidity', 'Wind',
       'Wind Speed', 'Wind Gust', 'Pressure', 'Condition', 'sch_dep',
       'sch_arr', 'Delayed'],
      dtype='object')

In [7]:
from sklearn.model_selection import train_test_split
Independent_features = smoted_df.iloc[:, :-1].values
dependent_feature = smoted_df.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(
        Independent_features, dependent_feature, test_size=0.2, random_state=0
    )

In [8]:
features=['MONTH', 'DAY_OF_MONTH', 'DAY_OF_WEEK', 'DEST', 'CRS_ELAPSED_TIME',
       'DISTANCE', 'Temperature', 'Dew Point', 'Humidity', 'Wind',
       'Wind Speed', 'Wind Gust', 'Pressure', 'Condition', 'sch_dep',
       'sch_arr']

In [9]:
folder="/home/nalin21478/ML-Flight-Delay-Prediction/plots/SHAP"

In [10]:
from sklearn.ensemble import GradientBoostingClassifier

import xgboost as xgb
from sklearn.metrics import accuracy_score

print("XGBoost CLASSIFIER \n")

xgb_classifier = xgb.XGBClassifier(random_state=0)

xgb_classifier.fit(X_train, y_train)
shap_values = explain_shap(xgb_classifier, X_test, folder, 'XGBoostClassifier',features)





XGBoost CLASSIFIER 



Permutation explainer:  23%|██▎       | 2190/9377 [05:02<16:22,  7.31it/s]