### Using SHAP for Feature Drift Analysis
**Description**: Utilize SHapley Additive exPlanations (SHAP) values to analyze feature
importance changes over time, indicating feature drift.

In [3]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
import shap
import matplotlib.pyplot as plt

# Simulate historical dataset
np.random.seed(0)
data_1 = pd.DataFrame({
    'feature1': np.random.normal(0, 1, 500),
    'feature2': np.random.normal(5, 2, 500),
    'target': np.random.randint(0, 2, 500)
})

# Simulate recent dataset with drift
data_2 = pd.DataFrame({
    'feature1': np.random.normal(1, 1, 500),
    'feature2': np.random.normal(6, 2.5, 500),
    'target': np.random.randint(0, 2, 500)
})

# Combine and mark source
data_1['source'] = 'historical'
data_2['source'] = 'recent'
combined = pd.concat([data_1, data_2])

# Train model on full dataset to understand feature importance
X = combined[['feature1', 'feature2']]
y = combined['target']
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y)

# SHAP explanation
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)

# Add SHAP values to data
combined['shap_feature1'] = shap_values[1][:, 0]
combined['shap_feature2'] = shap_values[1][:, 1]

# Compare SHAP values over time to detect feature drift
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
combined.boxplot(column='shap_feature1', by='source')
plt.title('SHAP Value Drift - Feature1')
plt.suptitle('')

plt.subplot(1, 2, 2)
combined.boxplot(column='shap_feature2', by='source')
plt.title('SHAP Value Drift - Feature2')
plt.suptitle('')

plt.tight_layout()
plt.show()


ValueError: Length of values (2) does not match length of index (1000)

In [2]:
!pip install shap

Defaulting to user installation because normal site-packages is not writeable
Collecting shap
  Downloading shap-0.47.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (992 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m992.3/992.3 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting slicer==0.0.8
  Downloading slicer-0.0.8-py3-none-any.whl (15 kB)
Installing collected packages: slicer, shap
Successfully installed shap-0.47.2 slicer-0.0.8

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
