In [3]:
pip install shap matplotlib


Defaulting to user installation because normal site-packages is not writeable
Looking in links: /usr/share/pip-wheels
Note: you may need to restart the kernel to use updated packages.


In [5]:
import pandas as pd
import numpy as np
import shap
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

# Load dataset
df = pd.read_csv("atm_pune_dataset.csv")
df["Shutdown_Label"] = ((df["Monthly_Transactions"] < 200) | (df["Last_Used_Days_Ago"] > 60)).astype(int)

# One-hot encode
df_model = pd.get_dummies(df.copy(), columns=["Location", "Area_Type"], drop_first=True)
X = df_model.drop(columns=["ATM_ID", "City", "ATM_Status", "Shutdown_Label"])
y = df_model["Shutdown_Label"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Tune and fit KNN
param_grid = {'n_neighbors': list(range(3, 21, 2)), 'weights': ['uniform', 'distance']}
grid = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring='f1', n_jobs=-1)
grid.fit(X_train_scaled, y_train)
best_knn = grid.best_estimator_

# SHAP with KernelExplainer (CPU-friendly)
print("üîç Running SHAP KernelExplainer...")
explainer = shap.KernelExplainer(best_knn.predict, X_train_scaled[:100])
shap_values = explainer.shap_values(X_test_scaled[:50])  # limited for speed

# Summary bar plot
shap.summary_plot(shap_values, X_test.iloc[:50], plot_type="bar", show=False)
plt.tight_layout()
plt.savefig("shap_summary_bar.png")
plt.close()

# Full summary plot
shap.summary_plot(shap_values, X_test.iloc[:50], show=False)
plt.tight_layout()
plt.savefig("shap_summary.png")
plt.close()

print("SHAP plots saved: shap_summary_bar.png, shap_summary.png")


üîç Running SHAP KernelExplainer...


found 0 physical cores < 1
  File "/opt/conda/envs/anaconda-2024.02-py310/lib/python3.10/site-packages/joblib/externals/loky/backend/context.py", line 217, in _count_physical_cores
    raise ValueError(


  0%|          | 0/50 [00:00<?, ?it/s]

SHAP plots saved: shap_summary_bar.png, shap_summary.png
