In [1]:
!conda create --name botnet-detection python=3.10.9 -y
!conda activate botnet-detection

In [None]:
%conda install -c conda-forge tensorflow=2.10.0 numpy=1.24.2 pandas=1.5.2 scikit-learn=1.1.2 sklearn-pandas=2.2.0 -y

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

from pickle import load

tf.get_logger().setLevel('ERROR')

# Pandas-Jupyter options
pd.set_option("display.precision", 4)
pd.set_option("display.max_rows", 4)
pd.set_option("display.max_columns", 90)
pd.set_option("display.float_format", '{:,.2g}'.format)

# Disable SKLearn warnings
import warnings
def warn(*args, **kwargs):
    pass
warnings.warn = warn

In [3]:
# Load scaler
scaler = load(open("archive_64/scaler.pkl", 'rb'))

# Load model
model = tf.keras.models.load_model("archive_64/dnn_3ep.h5")
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_16 (Dense)            (None, 16)                1040      
                                                                 
 dense_17 (Dense)            (None, 16)                272       
                                                                 
 dense_18 (Dense)            (None, 16)                272       
                                                                 
 dense_19 (Dense)            (None, 1)                 17        
                                                                 
Total params: 1,601
Trainable params: 1,601
Non-trainable params: 0
_________________________________________________________________


In [21]:
# Load flows
flows = pd.read_csv('archive_64/selected/flows.csv', index_col=0)
flow_features = pd.read_csv('archive_64/selected/features.csv', index_col=0)
# flows = pd.read_csv('flows/examples/Virut/Virut_Flows.csv', index_col=0)
flow_features

Unnamed: 0,Flow Duration,Total Fwd Packet,Total Bwd packets,Total Length of Fwd Packet,Total Length of Bwd Packet,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Length,Bwd Header Length,Packet Length Min,Packet Length Max,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWR Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Fwd Segment Size Avg,Bwd Segment Size Avg,Fwd Bytes/Bulk Avg,Fwd Packet/Bulk Avg,Fwd Bulk Rate Avg,Bwd Bytes/Bulk Avg,Bwd Packet/Bulk Avg,Bwd Bulk Rate Avg,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Fwd Init Win Bytes,Bwd Init Win Bytes,Fwd Act Data Pkts,Fwd Seg Size Min
12,10951191,27,25,6532,6330,1.5e+03,0,2.4e+02,4.3e+02,3.2e+03,0,2.5e+02,6.5e+02,2.1e+05,1.1e+06,7.6e+06,30,1.1e+07,4.2e+05,1.6e+06,7.6e+06,1.6e+02,1.1e+07,4.6e+05,1.7e+06,7.8e+06,1.9e+02,0,0,0,0,548,508,0,3.2e+03,2.4e+02,5.4e+02,2.9e+05,1,2,0,38,51,0,0,0,0,2.5e+02,2.4e+02,2.5e+02,0,0,0,4287,13,35743,0,125,0,121,64240,24820,21,20
1860,147862986,6,0,0,0,0,0,0,0,0,0,0,0,3e+07,5.6e+07,1.3e+08,3e+06,1.5e+08,3e+07,5.6e+07,1.3e+08,3e+06,0,0,0,0,0,0,0,0,0,168,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,64240,0,0,28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
316026,8966390,6,0,0,0,0,0,0,0,0,0,0,0,1.8e+06,2.7e+06,6e+06,9,9e+06,1.8e+06,2.7e+06,6e+06,9,0,0,0,0,0,0,0,0,0,168,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,64240,0,0,28
316029,9012934,6,0,0,0,0,0,0,0,0,0,0,0,1.8e+06,2.7e+06,6e+06,10,9e+06,1.8e+06,2.7e+06,6e+06,10,0,0,0,0,0,0,0,0,0,168,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,64240,0,0,28


In [22]:
def loss_gradient(input, ground_truth):
    loss_object = tf.keras.losses.BinaryCrossentropy()

    with tf.GradientTape() as tape:
      tape.watch(input)
      prediction = model(input) # "model" must be initialized earlier
      loss = loss_object(ground_truth, prediction)

    # Get the gradients of the loss w.r.t to the input
    gradient = tape.gradient(loss, input)
    gradient_sign = tf.sign(gradient)
    return gradient.numpy()[0], gradient_sign.numpy()[0]

In [23]:
def flow_gradients(features):
    df_grad, df_grad_signs = features.copy(), features.copy()   # Leave original frame intact
    features = scaler.transform(features)                       # Scale features

    # Calculate loss function gradients for each sample
    for i in range(len(df_grad)):
        gradient, gradient_sign = loss_gradient(tf.cast([features[i]], tf.float32), [[1]]) # Label 1 implies flows are malicious
        df_grad.iloc[i, :] = gradient
        df_grad_signs.iloc[i, :] = gradient_sign
    return df_grad, df_grad_signs

flows_grad, flows_grad_signs = flow_gradients(flow_features)
flows_grad_signs

Unnamed: 0,Flow Duration,Total Fwd Packet,Total Bwd packets,Total Length of Fwd Packet,Total Length of Bwd Packet,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Length,Bwd Header Length,Packet Length Min,Packet Length Max,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWR Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Fwd Segment Size Avg,Bwd Segment Size Avg,Fwd Bytes/Bulk Avg,Fwd Packet/Bulk Avg,Fwd Bulk Rate Avg,Bwd Bytes/Bulk Avg,Bwd Packet/Bulk Avg,Bwd Bulk Rate Avg,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Fwd Init Win Bytes,Bwd Init Win Bytes,Fwd Act Data Pkts,Fwd Seg Size Min
12,-1,-1,1,1,1,1,1,1,1,-1,-1,-1,1,1,1,1,1,-1,1,-1,-1,1,1,1,1,1,1,-1,-1,1,-1,-1,1,-1,1,-1,1,-1,1,-1,-1,-1,1,1,-1,-1,-1,1,1,-1,1,1,-1,1,1,-1,1,-1,1,-1,-1,-1,1,1
1860,1,1,1,1,1,-1,-1,1,1,-1,-1,1,-1,1,1,1,1,-1,1,1,-1,1,1,1,1,1,1,-1,-1,1,-1,1,1,-1,-1,-1,1,-1,1,-1,-1,-1,1,1,-1,-1,-1,1,1,-1,1,-1,-1,-1,1,-1,-1,-1,1,1,-1,-1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
316026,-1,-1,1,-1,1,1,1,1,1,1,-1,-1,1,-1,-1,-1,-1,1,-1,1,-1,-1,-1,-1,-1,1,-1,1,-1,1,-1,-1,-1,1,1,1,-1,1,-1,1,1,1,-1,1,-1,-1,1,-1,1,-1,-1,1,-1,1,1,1,-1,1,-1,-1,-1,-1,1,-1
316029,-1,-1,1,-1,1,1,1,1,1,1,-1,-1,1,-1,-1,-1,-1,1,-1,1,-1,-1,-1,-1,-1,1,-1,1,-1,1,-1,-1,-1,1,1,1,-1,1,-1,1,1,1,-1,1,-1,-1,1,-1,1,-1,-1,1,-1,1,1,1,-1,1,-1,-1,-1,-1,1,-1


In [26]:
# make flows adversarial (or at least try)
def make_adversarial(feautures, feature_grad_signs):
    features_to_change = [
        "Flow Duration", 
        "Flow IAT Mean", "Flow IAT Std", "Flow IAT Max", "Flow IAT Min", 
        "Fwd IAT Total", "Fwd IAT Mean", "Fwd IAT Std", "Fwd IAT Max", "Fwd IAT Min", 
        "Bwd IAT Total", "Bwd IAT Mean", "Bwd IAT Std", "Bwd IAT Max", "Bwd IAT Min",
    ]

    # Set all gradients to zero except the ones we want to change
    feature_grad_signs.loc[:, ~feature_grad_signs.columns.isin(features_to_change)] = 0
    
    # Note: Epsilon-scaling is applied BEFORE MinMax-scaling
    eps = [ 0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7 ]
    result = {}
    print("Misclassification rate:")
    for eps in eps:
        f = feautures.copy()
        # Scale feature in the direction of the gradient
        f = f + eps * f * feature_grad_signs

        new_predictions = model.predict(scaler.transform(f), verbose=0)

        f["Predicted Probability (epsilon = " + str(eps) + ")"] = new_predictions
        print(f'eps = {str(eps)}: {np.sum(new_predictions < 0.99) / len(new_predictions) * 100:.2f}%')
        result[eps] = f

    return result

result = make_adversarial(flow_features, flows_grad_signs)

Misclassification rate:
eps = 0: 0.00%
eps = 0.01: 0.05%
eps = 0.05: 0.13%
eps = 0.1: 0.34%
eps = 0.2: 0.44%
eps = 0.3: 2.59%
eps = 0.4: 7.02%
eps = 0.5: 7.08%
eps = 0.6: 7.12%
eps = 0.7: 7.15%
