In [1]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import keras
from keras import Sequential
from keras.layers import Dense, BatchNormalization, Dropout, TimeDistributed, LSTM, LayerNormalization
from keras import regularizers
from keras.models import load_model
from scipy.stats import mode

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
def load_dataset(filename):
    df = pd.read_csv(filename)

    # Clean up dataset
    nan_value = float("NaN")

    df.replace("", nan_value, inplace=True)
    df.dropna(inplace=True)
    df['Flag'].replace("R",0, inplace=True)
    df['Flag'].replace("T",1, inplace=True)

    for i in range(3, 11):
        df.iloc[:, i] = df.iloc[:, i].apply(int, base=16)

    df.iloc[:, 1] = df.iloc[:, 1].apply(int, base=16)

    feature_cols = ['ID', 'Data1', 'Data2', 'Data3', 'Data4', 'Data5', 'Data6', 'Data7', 'Data8']

    X = df.loc[:, feature_cols].values
    y = df['Flag'].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    scaled_df = pd.DataFrame(X_scaled, index=df.index, columns=feature_cols)

    X_train, X_test, y_train, y_test = train_test_split(scaled_df, y, test_size=0.20, random_state=5)

    X_train = X_train.astype(float)
    X_train = pd.DataFrame(X_train, columns=['ID', 'Data1', 'Data2', 'Data3', 'Data4', 'Data5', 'Data6', 'Data7', 'Data8'])

    y_train = y_train.astype(float)
    y_train = pd.DataFrame(y_train, columns=['Flag'])

    X_test = X_test.astype(float)
    X_test = pd.DataFrame(X_test, columns=['ID', 'Data1', 'Data2', 'Data3', 'Data4', 'Data5', 'Data6', 'Data7', 'Data8'])

    y_test = y_test.astype(float)
    y_test = pd.DataFrame(y_test, columns=['Flag'])

    return X_train, X_test, y_train, y_test

In [3]:
def ann(self, X_train, y_train, modelname):
    model_ann = Sequential()
    model_ann.add(Dense(16, input_dim=9, activation='relu', kernel_initializer='he_uniform',
                        kernel_regularizer=regularizers.l2(0.0001)))
    model_ann.add(BatchNormalization())
    model_ann.add(Dropout(0.2))
    model_ann.add(
        Dense(16, kernel_initializer='he_uniform', activation='relu', kernel_regularizer=regularizers.l2(0.0001)))
    model_ann.add(BatchNormalization())
    model_ann.add(Dropout(0.2))
    model_ann.add(Dense(1, kernel_initializer='glorot_uniform', activation='sigmoid'))
    model_ann.compile(loss='binary_crossentropy',
                        optimizer='adam',
                        metrics=['accuracy'])

    model_ann.fit(X_train, y_train, epochs=10, batch_size=128)
    model_ann.save(modelname)
    print("Done Training!")

In [4]:

X_train, X_test, y_train, y_test = load_dataset('dos_cleaned.csv')

model_ann = load_model('simpleann/AJ_dos_model')

y_pred = model_ann.predict(X_test)

y_pred = y_pred.astype(int)

from sklearn import metrics
print("Accuracy: ", metrics.accuracy_score(y_test, y_pred))
print("Precision: ", metrics.precision_score(y_test, y_pred))
print("Recall: ", metrics.recall_score(y_test, y_pred))
print("Area Under Curve: ", metrics.roc_auc_score(y_test, y_pred))

tp = metrics.confusion_matrix(y_test, y_pred)
print(tp)

t_TN, t_FP, t_FN, t_TP = metrics.confusion_matrix(y_test, y_pred).ravel()
print("True Positive:",t_TP)
print("False Positive:",t_FP)
print("True Negative: ",t_TN)
print("False Negative: ",t_FN)
#fpr = round(self.t_FP / (self.t_FP + self.t_TN), 3)

Accuracy:  1.0
Precision:  1.0
Recall:  1.0
Area Under Curve:  1.0
[[615489      0]
 [     0 117666]]
True Positive: 117666
False Positive: 0
True Negative:  615489
False Negative:  0


In [42]:
# BEGIN TESTING FASTSHAP
import fastshap
from fastshap.plotting import plot_variable_effect_on_output

# provide model and selection of dataset samples to represent “typical” feature values (the so called background distribution).
ke = fastshap.KernelExplainer(model_ann.predict, X_test.iloc[:20,:])



In [44]:
# sv = ke.calculate_shap_values(X_test.iloc[60:70,:], verbose=False)

ke.stratify_background_set(2)
sv = ke.calculate_shap_values(
  X_test.iloc[100:110,:],
  background_fold_to_use=0,
  verbose=False
)

# create plot
plot_variable_effect_on_output(
    sv, X_test.iloc[100:110,:],
    variable="ID",
    interaction_variable="auto"
)



  warn(
