# Import

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
!pip install shap

Collecting shap
  Downloading shap-0.41.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (572 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m572.6/572.6 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
Collecting slicer==0.0.7 (from shap)
  Downloading slicer-0.0.7-py3-none-any.whl (14 kB)
Installing collected packages: slicer, shap
Successfully installed shap-0.41.0 slicer-0.0.7


In [4]:
import shap
import pandas as pd
import tensorflow as tf
from tensorflow import keras

# Load trainset

In [5]:
df = pd.read_csv('/content/drive/MyDrive/Dataset/insdn/multi/train.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 385156 entries, 0 to 385155
Data columns (total 77 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   Flow Duration      385156 non-null  float64
 1   Tot Fwd Pkts       385156 non-null  float64
 2   Tot Bwd Pkts       385156 non-null  float64
 3   TotLen Fwd Pkts    385156 non-null  float64
 4   TotLen Bwd Pkts    385156 non-null  float64
 5   Fwd Pkt Len Max    385156 non-null  float64
 6   Fwd Pkt Len Min    385156 non-null  float64
 7   Fwd Pkt Len Mean   385156 non-null  float64
 8   Fwd Pkt Len Std    385156 non-null  float64
 9   Bwd Pkt Len Max    385156 non-null  float64
 10  Bwd Pkt Len Min    385156 non-null  float64
 11  Bwd Pkt Len Mean   385156 non-null  float64
 12  Bwd Pkt Len Std    385156 non-null  float64
 13  Flow Byts/s        385156 non-null  float64
 14  Flow Pkts/s        385156 non-null  float64
 15  Flow IAT Mean      385156 non-null  float64
 16  Fl

In [6]:
df = df.drop(columns=['Label'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 385156 entries, 0 to 385155
Data columns (total 76 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   Flow Duration      385156 non-null  float64
 1   Tot Fwd Pkts       385156 non-null  float64
 2   Tot Bwd Pkts       385156 non-null  float64
 3   TotLen Fwd Pkts    385156 non-null  float64
 4   TotLen Bwd Pkts    385156 non-null  float64
 5   Fwd Pkt Len Max    385156 non-null  float64
 6   Fwd Pkt Len Min    385156 non-null  float64
 7   Fwd Pkt Len Mean   385156 non-null  float64
 8   Fwd Pkt Len Std    385156 non-null  float64
 9   Bwd Pkt Len Max    385156 non-null  float64
 10  Bwd Pkt Len Min    385156 non-null  float64
 11  Bwd Pkt Len Mean   385156 non-null  float64
 12  Bwd Pkt Len Std    385156 non-null  float64
 13  Flow Byts/s        385156 non-null  float64
 14  Flow Pkts/s        385156 non-null  float64
 15  Flow IAT Mean      385156 non-null  float64
 16  Fl

# Load adv samples

In [7]:
feature_list = ['Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts',
       'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min',
       'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max',
       'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s',
       'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max',
       'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std',
       'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean',
       'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags',
       'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len',
       'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min',
       'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var',
       'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt',
       'ACK Flag Cnt', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt',
       'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg',
       'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Avg',
       'Bwd Byts/b Avg', 'Bwd Pkts/b Avg', 'Bwd Blk Rate Avg',
       'Subflow Fwd Pkts', 'Subflow Fwd Byts', 'Subflow Bwd Pkts',
       'Subflow Bwd Byts', 'Init Fwd Win Byts', 'Init Bwd Win Byts',
       'Fwd Act Data Pkts', 'Fwd Seg Size Min', 'Active Mean', 'Active Std',
       'Active Max', 'Active Min', 'Idle Mean', 'Idle Std', 'Idle Max',
       'Idle Min']

In [8]:
pgd_path = '/content/drive/MyDrive/Dataset/insdn/adv/pgd/adv_pgd_multi_target.csv'
#pgd_label_path = '/content/drive/MyDrive/DACN_XAI_Adv_defense/Dataset/insdn/adv/pgd/adv_pgd_target.csv'

In [9]:
pgd_df = pd.read_csv(pgd_path)

In [10]:
pgd_df.columns = feature_list
pgd_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 77203 entries, 0 to 77202
Data columns (total 76 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Flow Duration      77203 non-null  float64
 1   Tot Fwd Pkts       77203 non-null  float64
 2   Tot Bwd Pkts       77203 non-null  float64
 3   TotLen Fwd Pkts    77203 non-null  float64
 4   TotLen Bwd Pkts    77203 non-null  float64
 5   Fwd Pkt Len Max    77203 non-null  float64
 6   Fwd Pkt Len Min    77203 non-null  float64
 7   Fwd Pkt Len Mean   77203 non-null  float64
 8   Fwd Pkt Len Std    77203 non-null  float64
 9   Bwd Pkt Len Max    77203 non-null  float64
 10  Bwd Pkt Len Min    77203 non-null  float64
 11  Bwd Pkt Len Mean   77203 non-null  float64
 12  Bwd Pkt Len Std    77203 non-null  float64
 13  Flow Byts/s        77203 non-null  float64
 14  Flow Pkts/s        77203 non-null  float64
 15  Flow IAT Mean      77203 non-null  float64
 16  Flow IAT Std       772

In [None]:
#pgd_label = pd.read_csv(pgd_label_path)
#pgd_label.info()

# Load DL-based IDS model

In [11]:
model_mlp = tf.keras.models.load_model('/content/drive/MyDrive/Dataset/insdn/insdn_mlp_multi.h5', compile=True)

# SHAP

In [12]:
explainer = shap.KernelExplainer(model_mlp, shap.sample(df, 100))

In [13]:
pgd_1000 = pgd_df.head(1000)

In [14]:
shap_values = explainer.shap_values(pgd_1000)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [15]:
print(shap_values[0])

[[ 0.          0.02175684  0.02951276 ... -0.00926304 -0.00493281
  -0.00932875]
 [ 0.          0.00736126  0.05722623 ...  0.         -0.01743178
   0.01650953]
 [-0.09720036 -0.00953832  0.02235504 ...  0.00858994 -0.0885182
   0.05697584]
 ...
 [ 0.          0.02274457  0.02527373 ...  0.         -0.00919211
   0.        ]
 [ 0.00854275  0.01638585  0.01304797 ...  0.0023495   0.00701165
  -0.00734758]
 [ 0.          0.01365036  0.03299578 ... -0.0076538  -0.02475267
   0.00542074]]


In [16]:
type(shap_values[0])

numpy.ndarray

In [17]:
data_df = pd.DataFrame(shap_values[0])

In [18]:
data_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,66,67,68,69,70,71,72,73,74,75
0,0.0,0.021757,0.029513,0.006152,0.0,0.034843,0.016012,0.029036,0.064038,0.0,...,0.0,0.0,0.0,0.01231,0.0,0.007549,-0.01393,-0.009263,-0.004933,-0.009329
1,0.0,0.007361,0.057226,0.0,0.002343,0.13633,0.0,0.100595,0.130059,0.05387,...,0.032048,0.0,0.0,0.017105,0.0,-0.022295,0.0,0.0,-0.017432,0.01651
2,-0.0972,-0.009538,0.022355,0.0,0.0,0.038967,0.0,0.0,0.020022,0.0,...,0.00996,-0.012335,-0.008566,0.020155,0.0,-0.015272,-0.018193,0.00859,-0.088518,0.056976
3,0.0,0.009073,0.027457,0.005884,0.0,0.03881,0.01229,0.030603,0.042299,0.008445,...,0.0,-0.010909,0.0,0.0,0.007912,0.0,-0.010427,0.0,0.007985,-0.009417
4,-0.055427,0.0,0.014616,0.0,-0.00501,0.04107,-0.005084,0.0,0.036859,0.0,...,0.014206,0.0,0.0,-0.008514,0.0,-0.008254,-0.032687,0.0,-0.066015,0.021556


In [None]:
data_df.to_csv('/content/drive/MyDrive/DACN_XAI_Adv_defense/InSDN/MLP_pgd/1000_samples_[0].csv', index=False)

In [19]:
data_df.columns = feature_list
data_df.head()

Unnamed: 0,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,Fwd Pkt Len Mean,Fwd Pkt Len Std,Bwd Pkt Len Max,...,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
0,0.0,0.021757,0.029513,0.006152,0.0,0.034843,0.016012,0.029036,0.064038,0.0,...,0.0,0.0,0.0,0.01231,0.0,0.007549,-0.01393,-0.009263,-0.004933,-0.009329
1,0.0,0.007361,0.057226,0.0,0.002343,0.13633,0.0,0.100595,0.130059,0.05387,...,0.032048,0.0,0.0,0.017105,0.0,-0.022295,0.0,0.0,-0.017432,0.01651
2,-0.0972,-0.009538,0.022355,0.0,0.0,0.038967,0.0,0.0,0.020022,0.0,...,0.00996,-0.012335,-0.008566,0.020155,0.0,-0.015272,-0.018193,0.00859,-0.088518,0.056976
3,0.0,0.009073,0.027457,0.005884,0.0,0.03881,0.01229,0.030603,0.042299,0.008445,...,0.0,-0.010909,0.0,0.0,0.007912,0.0,-0.010427,0.0,0.007985,-0.009417
4,-0.055427,0.0,0.014616,0.0,-0.00501,0.04107,-0.005084,0.0,0.036859,0.0,...,0.014206,0.0,0.0,-0.008514,0.0,-0.008254,-0.032687,0.0,-0.066015,0.021556


In [20]:
data_dict = {}
for idx in range(0, 1000):
  # Select the desired row and get the top 10 values's column names
  row_values = data_df.iloc[idx].nlargest(10).index.tolist()

  # Create a dictionary and add the list with filename as the key
  data_dict['Adv sample ' + str(idx)] = row_values

In [21]:
print(data_dict)

{'Adv sample 0': ['Fwd Pkt Len Std', 'Fwd Pkt Len Max', 'Tot Bwd Pkts', 'Fwd Pkt Len Mean', 'Bwd Pkts/s', 'Pkt Size Avg', 'Fwd Header Len', 'Fwd Seg Size Avg', 'Tot Fwd Pkts', 'ACK Flag Cnt'], 'Adv sample 1': ['Fwd Pkt Len Max', 'Fwd Pkt Len Std', 'Fwd Pkt Len Mean', 'Fwd Seg Size Avg', 'Pkt Len Std', 'Pkt Len Mean', 'Pkt Size Avg', 'Tot Bwd Pkts', 'Bwd Pkt Len Max', 'Bwd Pkts/s'], 'Adv sample 2': ['Bwd Header Len', 'SYN Flag Cnt', 'Idle Min', 'Bwd Pkts/s', 'Fwd Pkt Len Max', 'Subflow Bwd Pkts', 'Fwd IAT Max', 'Tot Bwd Pkts', 'Active Std', 'Fwd Pkt Len Std'], 'Adv sample 3': ['Bwd Header Len', 'SYN Flag Cnt', 'Fwd Pkt Len Std', 'Fwd Seg Size Avg', 'Fwd Pkt Len Max', 'Fwd Pkt Len Mean', 'Tot Bwd Pkts', 'Subflow Bwd Pkts', 'FIN Flag Cnt', 'Fwd IAT Std'], 'Adv sample 4': ['Bwd Header Len', 'Fwd Pkt Len Max', 'SYN Flag Cnt', 'Fwd Pkt Len Std', 'Init Fwd Win Byts', 'Fwd Header Len', 'Subflow Bwd Pkts', 'Idle Min', 'Flow Pkts/s', 'Flow Byts/s'], 'Adv sample 5': ['Bwd Header Len', 'SYN Flag C

In [22]:
len(shap_values[0])

1000

In [23]:
len(data_dict)

1000

# Extract local feature for each adv sample (77203 in total)

In [None]:
for idx in range(0, 77203):
  # Create a DataFrame with the SHAP values and feature names
  df_shap_values = pd.DataFrame({'SHAP Values': exp[idx].values}, index=feature_list)
  df_shap_values.sort_values(by=['SHAP Values'], ascending=False, inplace=True)
  df_shap_values.to_csv('/content/drive/MyDrive/DACN_XAI_Adv_defense/InSDN/pgd_Result/SHAP_ADVSamples_' + str(idx) + '.csv')
  print('Extracted local importance feature of the ' + str(idx) + 'adv sample.')

# Detection phase

In [24]:
import os
import pandas as pd

In [26]:
whitelist_df = pd.read_csv('/content/drive/MyDrive/InSDN/MLP_top24_InSDN.csv')
whitelist_df.head(24)

Unnamed: 0,Feature,Frequency
0,FIN Flag Cnt,1223
1,SYN Flag Cnt,1196
2,Init Fwd Win Byts,986
3,Flow Pkts/s,884
4,ACK Flag Cnt,808
5,Pkt Size Avg,767
6,Bwd Pkts/s,752
7,Down/Up Ratio,732
8,Pkt Len Mean,667
9,Pkt Len Min,588


## Detection rate (top 24) = 969/1000

In [27]:
whitelist = whitelist_df.iloc[:, 0][:24].tolist()
print(whitelist)

['FIN Flag Cnt', 'SYN Flag Cnt', 'Init Fwd Win Byts', 'Flow Pkts/s', 'ACK Flag Cnt', 'Pkt Size Avg', 'Bwd Pkts/s', 'Down/Up Ratio', 'Pkt Len Mean', 'Pkt Len Min', 'Bwd Pkt Len Min', 'Pkt Len Max', 'Init Bwd Win Byts', 'Fwd Pkt Len Max', 'Fwd IAT Tot', 'Flow IAT Max', 'PSH Flag Cnt', 'Fwd Pkt Len Std', 'Fwd Pkt Len Mean', 'Pkt Len Std', 'Bwd IAT Tot', 'Idle Max', 'Fwd Seg Size Avg', 'Fwd IAT Std']


In [28]:
alert = []

# Iterate over each key-value pair in the data_dict
for key, value in data_dict.items():
    # Check if there are one feature is not in the whitelist
    for string in value:
      if string not in whitelist:
        alert.append(key)
        break

# Print the alert dictionary
print("Alert list length:" + str(len(alert)))

Alert list length:928


## Detection rate (top 38) = 783/1000

In [29]:
whitelist2 = whitelist_df.iloc[:, 0][:38].tolist()
print(whitelist2)

['FIN Flag Cnt', 'SYN Flag Cnt', 'Init Fwd Win Byts', 'Flow Pkts/s', 'ACK Flag Cnt', 'Pkt Size Avg', 'Bwd Pkts/s', 'Down/Up Ratio', 'Pkt Len Mean', 'Pkt Len Min', 'Bwd Pkt Len Min', 'Pkt Len Max', 'Init Bwd Win Byts', 'Fwd Pkt Len Max', 'Fwd IAT Tot', 'Flow IAT Max', 'PSH Flag Cnt', 'Fwd Pkt Len Std', 'Fwd Pkt Len Mean', 'Pkt Len Std', 'Bwd IAT Tot', 'Idle Max', 'Fwd Seg Size Avg', 'Fwd IAT Std', 'Bwd PSH Flags', 'Flow Duration', 'Bwd IAT Std', 'Fwd Pkt Len Min', 'Flow IAT Std', 'Bwd IAT Max', 'Idle Mean', 'Fwd IAT Max', 'URG Flag Cnt', 'Idle Min', 'Flow IAT Mean', 'Fwd IAT Mean', 'Flow Byts/s', 'Bwd Pkt Len Mean']


In [30]:
alert2 = []

# Iterate over each key-value pair in the data_dict
for key, value in data_dict.items():
    # Check if there are one feature is not in the whitelist
    for string in value:
      if string not in whitelist2:
        alert2.append(key)
        break

# Print the alert dictionary
print("Alert list length:" + str(len(alert2)))

Alert list length:868
