# Example of using our ExplainAnomaliesUsingSHAP code on data taken from kaggle: https://www.kaggle.com/mlg-ulb/creditcardfraud

In [None]:
from ExplainAnomaliesUsingSHAP import ExplainAnomaliesUsingSHAP
import pandas as pd

# Normaliztion of data

In [2]:
df = pd.read_csv('../data/creditcard.csv', delimiter=',')
df = df.drop(['Time'], axis=1)
for col in df.columns[:-1]:
    min_val = df[col].min()
    max_val = df[col].max()
    if min_val != max_val:
        df[col] = (df[col] - min_val) / (max_val - min_val)
        
df.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.935192,0.76649,0.881365,0.313023,0.763439,0.267669,0.266815,0.786444,0.475312,0.5106,...,0.561184,0.522992,0.663793,0.391253,0.585122,0.394557,0.418976,0.312697,0.005824,0
1,0.978542,0.770067,0.840298,0.271796,0.76612,0.262192,0.264875,0.786298,0.453981,0.505267,...,0.55784,0.480237,0.666938,0.33644,0.58729,0.446013,0.416345,0.313423,0.000105,0
2,0.935217,0.753118,0.868141,0.268766,0.762329,0.281122,0.270177,0.788042,0.410603,0.513018,...,0.565477,0.54603,0.678939,0.289354,0.559515,0.402727,0.415489,0.311911,0.014739,0
3,0.941878,0.765304,0.868484,0.213661,0.765647,0.275559,0.266803,0.789434,0.414999,0.507585,...,0.559734,0.510277,0.662607,0.223826,0.614245,0.389197,0.417669,0.314371,0.004807,0
4,0.938617,0.77652,0.864251,0.269796,0.762975,0.263984,0.268968,0.782484,0.49095,0.524303,...,0.561327,0.547271,0.663392,0.40127,0.566343,0.507497,0.420561,0.31749,0.002724,0


# Split to Train and Test

In [3]:
X = df.iloc[:,:-1]
y = df.iloc[:, -1]

print('x shape:', X.shape)
print('y shape:', y.shape)
print(y.value_counts())

train_idx = y[y==0].index.values
test_idx = y[y==1].index.values

X_train = X.iloc[train_idx]
y_train = y[train_idx]

X_test = X.iloc[test_idx]
y_test = y[test_idx]

x shape: (284807, 29)
y shape: (284807,)
0    284315
1       492
Name: Class, dtype: int64


# Get explnation

In [4]:
exp_model = ExplainAnomaliesUsingSHAP(num_anomalies_to_explain=10)

In [5]:
all_sets_explaining_features = exp_model.explain_unsupervised_data(x_train=X_train, 
                                                                   x_explain=X_test,
                                                                   return_shap_values=True)

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 29)]              0         
_________________________________________________________________
dense (Dense)                (None, 14)                420       
_________________________________________________________________
dense_1 (Dense)              (None, 7)                 105       
_________________________________________________________________
dense_2 (Dense)              (None, 14)                112       
_________________________________________________________________
dense_3 (Dense)              (None, 29)                435       
Total params: 1,072
Trainable params: 1,072
Non-trainable params: 0
_________________________________________________________________
Train on 255883 samples, validate on 28432 samples
Epoch 1/10
255883/255883 - 8s - loss: 0.0024 - mse: 0.0023 - val_loss: 0.0

In [6]:
all_sets_explaining_features

{154587: [('V4', -1),
  ('V11', 0.0731265756822315),
  ('V15', 0.02369090910031154),
  ('V7', 0.016886018593518293),
  ('V3', 0.016570416382713796),
  ('V13', 0.015665877182100774),
  ('V10', 0.014052846919758964),
  ('V18', 0.00019320900654557518),
  ('V1', 0.00017568476566009978),
  ('V17', 0.00017336466464923974),
  ('V9', 0.0006888261077363056),
  ('V21', 0.0020651601972084717),
  ('V12', 0.0018995651805052225),
  ('V6', 0.001422672056074865),
  ('V8', -1)],
 154684: [('V4', -1),
  ('V11', 0.06857125766422263),
  ('V15', 0.024017587211378415),
  ('V13', 0.01748191127094885),
  ('V7', 0.01707902271076717),
  ('V3', 0.01379392178383141),
  ('V10', 0.01240802991961949),
  ('V1', -1),
  ('V18', 0.0025376704074176696),
  ('V9', 0.000837353113461579),
  ('V17', 0.00358466848771998),
  ('V12', 0.002072845369657088),
  ('V21', 0.0020574753036938336),
  ('V22', 0.00046080208520149593),
  ('V8', -1),
  ('V24', 0.0003597983812910178)],
 154371: [('V4', -1),
  ('V11', 0.08294142890489672),
  (