In [4]:
import torch.nn as nn
import torch.optim as optim
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
import shap
from joblib import dump, load

In [None]:
loaded_datasets_info = torch.load('/Users/jiaming/Desktop/Lab2/datas/saved_datasets_scaled.pth')
loaded_train_dataset = loaded_datasets_info['train_dataset']
loaded_val_dataset = loaded_datasets_info['val_dataset']
loaded_test_dataset = loaded_datasets_info['test_dataset']

In [None]:
from torch.utils.data import DataLoader

def extract_features_labels_from_subset(subset):
    
    loader = DataLoader(subset, batch_size=len(subset))
    
    for features, labels in loader:
        features = features.squeeze(1).numpy()
        labels = labels.squeeze(1).numpy()
        return features, labels

X_train, y_train = extract_features_labels_from_subset(loaded_train_dataset)
X_val, y_val = extract_features_labels_from_subset(loaded_val_dataset)
X_test, y_test = extract_features_labels_from_subset(loaded_test_dataset)

In [6]:
# Load data
pos = pd.read_csv('/Users/jiaming/Desktop/Lab2/datas/neg.csv') #644
neg = pd.read_csv('/Users/jiaming/Desktop/Lab2/datas/pos.csv') #644

datas = pd.concat([pos,neg])
labels = np.concatenate(([1] * pos.shape[0], [0] * neg.shape[0]), axis=0)

np.random.seed(1)
indices = np.random.permutation(labels.shape[0])

X = datas.iloc[indices].astype('int32')
y = labels[indices].astype('int32')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 123)

feature_names = pos.columns

# 4ML

## 1. LR

In [84]:
# lr_model = LogisticRegression(max_iter=100, random_state=42)
lr_model = load('/Users/jiaming/Desktop/Lab2/datas/ROC/models/lr_model.joblib')

masker = shap.maskers.Independent(data=X)
explainer_lr = shap.LinearExplainer(lr_model, masker)  
shap_values_lr = explainer_lr(X)

fig1_lr = shap.plots.bar(shap_values_lr, show=False) 
plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_fig1_lr.pdf', bbox_inches='tight')
plt.close(fig1_lr) 

fig2_lr = shap.plots.beeswarm(shap_values_lr, show=False) 
plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_fig2_lr.pdf', bbox_inches='tight')
plt.close(fig2_lr) 

No data for colormapping provided via 'c'. Parameters 'vmin', 'vmax' will be ignored


In [85]:
print(shap_values_lr)

.values =
array([[ 1.38595786e+00,  6.68707925e+00, -5.70666404e+00, ...,
         5.42907943e-05, -1.82264160e-03, -1.82264160e-03],
       [-4.76143589e+00,  6.68707925e+00, -5.25184528e-01, ...,
         5.42907943e-05, -1.82264160e-03, -1.82264160e-03],
       [-6.43799782e+00,  7.21798920e-01,  3.63279286e+00, ...,
        -8.85797170e-05, -1.82264160e-03, -1.82264160e-03],
       ...,
       [ 1.94481184e+00, -2.26084124e+00,  4.08057503e+00, ...,
        -8.85797170e-05, -1.82264160e-03, -1.82264160e-03],
       [-8.67341373e+00, -2.26084124e+00,  5.04010828e+00, ...,
         5.42907943e-05, -1.82264160e-03, -1.82264160e-03],
       [-3.64372793e+00,  7.21798920e-01,  2.16150855e+00, ...,
         5.42907943e-05, -1.82264160e-03, -1.82264160e-03]])

.base_values =
array([62.75038131, 62.75038131, 62.75038131, ..., 62.75038131,
       62.75038131, 62.75038131])

.data =
array([[ 54,  30, 333, ...,   0,   1,   0],
       [ 43,  30, 252, ...,   0,   1,   0],
       [ 40,  10, 187,

## 2. NB

In [86]:
# nb_model = GaussianNB()
# nb_model = load('/Users/jiaming/Desktop/Lab2/datas/ROC/models/nb_model.joblib')
nb_model.fit(X, y)

explainer_nb = shap.KernelExplainer(nb_model.predict_proba, shap.kmeans(X, 10))  
shap_values_nb = explainer_nb(X)

shap_values_nb_reconstructed = shap.Explanation(values=shap_values_nb.values[:,:,0], 
                                             base_values=explainer_nb.expected_value[0], 
                                             data=X, 
                                             feature_names=feature_names)

fig1_nb = shap.plots.bar(shap_values_nb_reconstructed, show=False) 
plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_fig1_nb.pdf', bbox_inches='tight')
plt.close(fig1_nb) 

fig2_nb = shap.plots.beeswarm(shap_values_nb_reconstructed, show=False) 
plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_fig2_nb.pdf', bbox_inches='tight')
plt.close(fig2_nb) 



  0%|          | 0/1288 [00:00<?, ?it/s]

No data for colormapping provided via 'c'. Parameters 'vmin', 'vmax' will be ignored


## 3. RF

In [87]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model = load('/Users/jiaming/Desktop/Lab2/datas/ROC/models/rf_model.joblib')

explainer_rf = shap.TreeExplainer(rf_model)
shap_values_rf = explainer_rf(X)

shap_values_rf_reconstructed = shap.Explanation(values=shap_values_rf.values[:,:,0], 
                                             base_values=explainer_rf.expected_value[0], 
                                             data=X, 
                                             feature_names=feature_names)

fig1_rf = shap.plots.bar(shap_values_rf_reconstructed, show=False) 
plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_fig1_rf.pdf', bbox_inches='tight')
plt.close(fig1_rf) 

fig2_rf = shap.plots.beeswarm(shap_values_rf_reconstructed, show=False) 
plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_fig2_rf.pdf', bbox_inches='tight')
plt.close(fig2_rf) 

No data for colormapping provided via 'c'. Parameters 'vmin', 'vmax' will be ignored


## 4. XGB

In [89]:
xgb_model = xgb.XGBRegressor(
    objective='reg:squarederror',  
    learning_rate=0.1,
    max_depth=10,
    n_estimators=30)
xgb_model.load_model('/Users/jiaming/Desktop/Lab2/datas/ROC/models/xgb_model.json') 

explainer_xgb = shap.Explainer(xgb_model)
shap_values_xgb = explainer_xgb(X)

fig1_xgb = shap.plots.bar(shap_values_xgb, show=False) 
plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_fig1_xgb.pdf', bbox_inches='tight')
plt.close(fig1_xgb) 

fig2_xgb = shap.plots.beeswarm(shap_values_xgb, show=False) 
plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_fig2_xgb.pdf', bbox_inches='tight')
plt.close(fig2_xgb) 

No data for colormapping provided via 'c'. Parameters 'vmin', 'vmax' will be ignored


# 2DL

## 1. LSTM

In [107]:
# table to numpy
X_np =  X.values 
# numpy to tensor
X_tensor = torch.tensor(X_np).float().unsqueeze(1)
print(X_tensor.shape)

torch.Size([1288, 1, 24])


In [108]:
class BinaryLSTM(nn.Module):
    def __init__(self, input_size=24, hidden_size=256, num_layers=2):
        super(BinaryLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        output = self.linear(lstm_out[:, -1, :])
        output = self.sigmoid(output)
        return output

lstm_model = BinaryLSTM()
lstm_model.load_state_dict(torch.load('/Users/jiaming/Desktop/Lab2/datas/ROC/models/LSTM/LSTM.pth'))

<All keys matched successfully>

In [None]:
explainer_lstm = shap.DeepExplainer(lstm_model, X_tensor)
shap_values_lstm = explainer_lstm.shap_values(X_tensor)

lstm_model.eval()
with torch.no_grad():
    predictions = lstm_model(background_data)
expected_value = predictions.numpy() # 2. base_values

shap_values_lstm_reconstructed = shap.Explanation(values=shap_values_lstm.squeeze(1), 
                                             base_values=expected_value[0], 
                                             data=X, 
                                             feature_names=feature_names)

fig1_lstm = shap.plots.bar(shap_values_lstm_reconstructed, show=False)
plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_fig1_lstm.pdf', bbox_inches='tight')
plt.close(fig1_lstm)

fig2_lstm = shap.plots.beeswarm(shap_values_lstm_reconstructed, show=False) 
plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_fig2_lstm.pdf', bbox_inches='tight')
plt.close(fig2_lstm) 

In [109]:
background_data = X_tensor[:100]
explainer_lstm = shap.DeepExplainer(lstm_model, background_data)
shap_values_lstm = explainer_lstm.shap_values(X_tensor) # 1. values

lstm_model.eval()
with torch.no_grad():
    predictions = lstm_model(background_data)
expected_value = predictions.numpy() # 2. base_values

shap_values_lstm_reconstructed = shap.Explanation(values=shap_values_lstm.squeeze(1), 
                                             base_values=expected_value[0], 
                                             data=X, 
                                             feature_names=feature_names)

fig1_lstm = shap.plots.bar(shap_values_lstm_reconstructed, show=False)
plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_fig1_lstm.pdf', bbox_inches='tight')
plt.close(fig1_lstm)

fig2_lstm = shap.plots.beeswarm(shap_values_lstm_reconstructed, show=False) 
plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_fig2_lstm.pdf', bbox_inches='tight')
plt.close(fig2_lstm) 

Using non-full backward hooks on a Module that does not return a single Tensor or a tuple of Tensors is deprecated and will be removed in future versions. This hook will be missing some of the grad_output. Please use register_full_backward_hook to get the documented behavior.
Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.














No data for colormapping provided via 'c'. Parameters 'vmin', 'vmax' will be ignored


In [110]:
shap_html = shap.force_plot(
    base_value=expected_value[0], 
    shap_values=shap_values_lstm.squeeze(1)[:100],  # SHAP values for the first 100 predictions
    features=X.iloc[:100],  # Feature values for the first 100 predictions
    feature_names=X.columns
)
shap.save_html('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_lstm_force_plot.html', shap_html)

## 2. ResNet

In [111]:
# table to numpy
X_np =  X.values 
# numpy to tensor
X_tensor = torch.tensor(X_np).float().unsqueeze(1)
print(X_tensor.shape)

torch.Size([1288, 1, 24])


In [112]:
learning_rate = 0.001
size = 8
class Net_conv(torch.nn.Module):
    def __init__(self, input_length):
        super(Net_conv, self).__init__()
        self.block_1 = torch.nn.Sequential(
            torch.nn.Conv1d(in_channels=1,
                            out_channels=size,
                            kernel_size=1,
                            stride=1,
                            padding=0),
            torch.nn.BatchNorm1d(size),
            torch.nn.ReLU(inplace=True),
            torch.nn.Conv1d(in_channels=size,
                                out_channels=2*size,
                                kernel_size=3,
                                stride=1,
                                padding=1),
            torch.nn.BatchNorm1d(2*size)
        )
        self.block_2 = torch.nn.Sequential(
            torch.nn.Conv1d(in_channels=2*size,
                            out_channels=4*size,
                            kernel_size=1,
                            stride=1,
                            padding=0),
            torch.nn.BatchNorm1d(4*size),
            torch.nn.ReLU(inplace=True),
            torch.nn.Conv1d(in_channels=4*size,
                                out_channels=2*size,
                                kernel_size=3, 
                                stride=1,
                                padding=1),
            torch.nn.BatchNorm1d(2*size)
        )
        iutput_size_block_1 = (input_length - 1 + 2 * 0) // 1 + 1  
        output_size_block_2 = (iutput_size_block_1 - 1 + 2 * 0) // 1 + 1  
        num_channels_last_layer = 2*size 
        linear_input_size = num_channels_last_layer * output_size_block_2    
        self.linear_1 = torch.nn.Linear(linear_input_size, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        shortcut = x.float()
        x = self.block_1(x)
        x = torch.nn.functional.relu(x + shortcut)    
        shortcut = x
        x = self.block_2(x)
        x = torch.nn.functional.relu(x + shortcut)     
        x = x.view(x.size(0), -1)
        x =  self.linear_1(x)
        x = self.sigmoid(x)
        return x

resnet_model = Net_conv(input_length = 24)
resnet_model.load_state_dict(torch.load('/Users/jiaming/Desktop/Lab2/datas/ROC/models/ResNet/ResNet.pth'))

<All keys matched successfully>

In [124]:
explainer_resnet = shap.DeepExplainer(resnet_model, X_tensor)
shap_values_resnet = explainer_resnet.shap_values(X_tensor)

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


In [131]:

print(shap_values_resnet.values)

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [129]:
fig1_resnet = shap.plots.bar(shap_values_resnet, show=False)
plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_fig1_resnet.pdf', bbox_inches='tight')
plt.close(fig1_resnet)

fig2_resnet = shap.plots.beeswarm(shap_values_resnet, show=False) 
plt.savefig('/Users/jiaming/Desktop/Lab2/datas/SHAP/shap_fig2_resnet.pdf', bbox_inches='tight')
plt.close(fig2_resnet) 

DimensionError: The shape of the shap_values matrix does not match the shape of the provided data matrix.