In [3]:
import torch.nn as nn
import torch.optim as optim
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
import shap

In [4]:
# Load data
pos = pd.read_csv('/Users/jiaming/Desktop/Lab2/datas/neg.csv') #644
neg = pd.read_csv('/Users/jiaming/Desktop/Lab2/datas/pos.csv') #644

datas = pd.concat([pos,neg])
labels = np.concatenate(([1] * pos.shape[0], [0] * neg.shape[0]), axis=0)

np.random.seed(1)
indices = np.random.permutation(labels.shape[0])

X = datas.iloc[indices].astype('int32')
y = labels[indices].astype('int32')

feature_names = pos.columns

In [5]:
####XGB
xgb_model = xgb.XGBRegressor(
    objective='reg:squarederror',  
    learning_rate=0.1,
    max_depth=10,
    n_estimators=30)

xgb_model.load_model('/Users/jiaming/Desktop/Lab2/datas/ROC/models/xgb_model.json')

explainer_xgb = shap.TreeExplainer(xgb_model)  
shap_values_xgb = explainer_xgb(X)

fig_xgb = shap.plots.bar(shap_values_xgb, show=False) 

plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_xgb.pdf', bbox_inches='tight')
plt.close(fig_xgb) 



In [24]:
print(shap_values_xgb)

.values =
array([[ 0.07639041, -0.01662542, -0.05849581, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.07639041, -0.01662542, -0.05849581, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.07767974, -0.01955114, -0.05653504, ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 0.07829252, -0.01173415, -0.05307686, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.07301322, -0.01206152, -0.05524357, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.07264336, -0.01878795, -0.058701  , ...,  0.        ,
         0.        ,  0.        ]], dtype=float32)

.base_values =
array([0.48345637, 0.48345637, 0.48345637, ..., 0.48345637, 0.48345637,
       0.48345637], dtype=float32)

.data =
array([[ 54,  30, 333, ...,   0,   1,   0],
       [ 43,  30, 252, ...,   0,   1,   0],
       [ 40,  10, 187, ...,   1,   1,   0],
       ...,
       [ 55,   0, 180, ...,   1,   1,   0],
       [ 36,   0, 165, ...,   0,   1,   0]

In [23]:
####LR
from joblib import dump, load
lr_model = LogisticRegression(max_iter=100, random_state=42)
lr_model = load('/Users/jiaming/Desktop/Lab2/datas/ROC/models/lr_model.joblib')

masker = shap.maskers.Independent(data=X)
explainer_lr = shap.LinearExplainer(lr_model, masker)  
shap_values_lr = explainer_lr(X)

fig_lr = shap.plots.bar(shap_values_lr, show=False) 

plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_lr.pdf', bbox_inches='tight')
plt.close(fig_lr) 

In [7]:
####LSTM

# table to numpy
X_np =  X.values 
# numpy to tensor
X_tensor = torch.tensor(X_np).float().unsqueeze(1)
print(X_tensor.shape)

torch.Size([1288, 1, 24])


In [None]:
class BinaryLSTM(nn.Module):
    def __init__(self, input_size=24, hidden_size=256, num_layers=2):
        super(BinaryLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        output = self.linear(lstm_out[:, -1, :])
        output = self.sigmoid(output)
        return output

lstm_model = BinaryLSTM()
lstm_model.load_state_dict(torch.load('/Users/jiaming/Desktop/Lab2/datas/ROC/models/LSTM/LSTM.pth'))

In [47]:
background_data = X_tensor[:100]
explainer_lstm = shap.DeepExplainer(lstm_model, background_data)
shap_values_lstm = explainer_lstm.shap_values(X_tensor) # 1. values
shap_values_lstm = shap_values_lstm.squeeze(1)

lstm_model.eval()
with torch.no_grad():
    predictions = lstm_model(background_data)
expected_value = predictions.numpy() # 2. base_values

# expl = shap.Explanation(
#     values=np.sum(shap_values_lstm, axis=1), 
#     base_values=expected_value,
#     data=X_tensor[:100].numpy(), # 3. data
#     feature_names=datas.columns.tolist()
# )


shap_html = shap.force_plot(
    base_value=explainer_lstm.expected_value,
    shap_values=shap_values_lstm[:100],  # SHAP values for the first 100 predictions
    features=X.iloc[:100],  # Feature values for the first 100 predictions
    feature_names=X.columns
)
shap.save_html('shap_force_plot.html', shap_html)

# fig_lstm = shap.plots.bar(expl, show=False)
# plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_lstm.pdf', bbox_inches='tight')
# plt.close()











