In [15]:
!pip3 install tensorflow keras pandas --upgrade --quiet

In [16]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import pandas as pd
import tensorflow.keras.backend as K
from tensorflow import keras
from tensorflow.keras.models import Model

In [17]:
import contextlib
import gc

@contextlib.contextmanager
def clear_memory():
    try:
        yield
    finally:
        gc.collect()

In [18]:
!git clone -q https://github.com/Colorado-Mesa-University-Cybersecurity/DeepLearning-IDS.git

fatal: destination path 'DeepLearning-IDS' already exists and is not an empty directory.


In [19]:
!wget "https://cse-cic-ids2018.s3.ca-central-1.amazonaws.com/Processed%20Traffic%20Data%20for%20ML%20Algorithms/Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv" -q --show-progress
!wget "https://cse-cic-ids2018.s3.ca-central-1.amazonaws.com/Processed%20Traffic%20Data%20for%20ML%20Algorithms/Friday-23-02-2018_TrafficForML_CICFlowMeter.csv" -q --show-progress



In [20]:
with clear_memory():
    data = pd.read_csv('Friday-23-02-2018_TrafficForML_CICFlowMeter.csv')

data.Label.value_counts()

Label
Benign              1048009
Brute Force -Web        362
Brute Force -XSS        151
SQL Injection            53
Name: count, dtype: int64

In [21]:
!python DeepLearning-IDS/data_cleanup.py "Friday-23-02-2018_TrafficForML_CICFlowMeter.csv" "result23022018"

cleaning Friday-23-02-2018_TrafficForML_CICFlowMeter.csv
total rows read = 1048576
all done writing 1042868 rows; dropped 5708 rows


In [22]:
with clear_memory():
    data_23_cleaned = pd.read_csv('result23022018.csv')

data_23_cleaned.head()

Unnamed: 0,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,22,6,1519374000.0,1532698,11,11,1179,1969,648,0,...,32,0.0,0.0,0,0,0.0,0.0,0,0,Benign
1,500,17,1519374000.0,117573855,3,0,1500,0,500,500,...,8,0.0,0.0,0,0,58786927.5,23753240.0,75583006,41990849,Benign
2,500,17,1519374000.0,117573848,3,0,1500,0,500,500,...,8,0.0,0.0,0,0,58786924.0,23753250.0,75583007,41990841,Benign
3,22,6,1519374000.0,1745392,11,11,1179,1969,648,0,...,32,0.0,0.0,0,0,0.0,0.0,0,0,Benign
4,500,17,1519374000.0,89483474,6,0,3000,0,500,500,...,8,4000364.0,0.0,4000364,4000364,21370777.5,15280920.0,41989576,7200485,Benign


In [23]:
data_23_cleaned.Label.value_counts()

Label
Benign              1042301
Brute Force -Web        362
Brute Force -XSS        151
SQL Injection            53
Name: count, dtype: int64

In [24]:
!wget -q -O it_threat_model.zip "https://drive.google.com/uc?export=download&id=1ahr5dYlhuxS56M6helUFI0yIxxIoFk9o"
!unzip -q it_threat_model.zip

replace __MACOSX/._it_threat_model? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [25]:
with clear_memory():
    model = keras.models.load_model('it_threat_model')

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               10240     
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dense_2 (Dense)             (None, 1)                 65        
                                                                 
Total params: 18561 (72.50 KB)
Trainable params: 18561 (72.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [26]:
with clear_memory():
    # Use the first layer
    layer_name = 'dense'
    intermediate_layer_model = Model(
        inputs = model.input,
        outputs = model.get_layer(layer_name).output
    )


In [27]:
from tqdm import tqdm
items_to_upload = []

with clear_memory():
    model_res = intermediate_layer_model.predict(K.constant(data_23_cleaned.iloc[:,:-1]))

    for i, res in tqdm(zip(data_23_cleaned.iterrows(), model_res), total = len(model_res)):
        benign_or_attack = i[1]['Label'][:3]
        items_to_upload.append((benign_or_attack + '_' + str(i[0]), res.tolist()))



100%|██████████| 1042867/1042867 [00:47<00:00, 21959.68it/s]


In [34]:
with clear_memory():
    df = pd.DataFrame(items_to_upload, columns=['ID', 'Model_Results'])

df.head()

Unnamed: 0,ID,Model_Results
0,Ben_0,"[0.0, 0.0, 0.0, 125628656.0, 0.0, 0.0, 5421442..."
1,Ben_1,"[0.0, 0.0, 0.0, 356751744.0, 1190461440.0, 0.0..."
2,Ben_2,"[0.0, 0.0, 0.0, 356751680.0, 1190461440.0, 0.0..."
3,Ben_3,"[0.0, 0.0, 0.0, 125515856.0, 0.0, 0.0, 5432884..."
4,Ben_4,"[0.0, 0.0, 0.0, 26214912.0, 698683840.0, 0.0, ..."


In [49]:
import struct

def data_to_binary(data: list[float]):
    format_string = 'f' * len(data)
    return struct.pack(format_string, *data)

with clear_memory():
    df['Model_Results'] = df['Model_Results'].apply(data_to_binary)

In [50]:
df.head()

Unnamed: 0,ID,Model_Results
0,Ben_0,b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00...
1,Ben_1,b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00...
2,Ben_2,b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00...
3,Ben_3,b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00...
4,Ben_4,b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00...


In [73]:
%%sql
DROP DATABASE IF EXISTS siem_log_kafka_demo;

CREATE DATABASE IF NOT EXISTS siem_log_kafka_demo;

USE siem_log_kafka_demo;

DROP TABLE IF EXISTS model_results_demo;

CREATE TABLE IF NOT EXISTS model_results (
    id TEXT,
    Model_Results BLOB
);

In [76]:
from sqlalchemy import *

db_connection = create_engine(connection_url)

In [77]:
with clear_memory():
    df.to_sql(
        'model_results',
        con = db_connection,
        if_exists = 'append',
        index = False,
        chunksize = 1000
    )

In [None]:
%%sql
USE siem_log_kafka_demo;

SELECT ID, JSON_ARRAY_UNPACK(Model_Results) AS Model_Results
FROM model_results
LIMIT 1;