In [None]:
# Download the IDS 2017 dataset
!wget https://iscxdownloads.cs.unb.ca/iscxdownloads/CIC-IDS-2017/MachineLearningCSV.zip
# Unzip the dataset
!unzip MachineLearningCSV.zip

In [None]:
import pandas as pd
filenames = ['MachineLearningCVE/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv', 'MachineLearningCVE/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv', 'MachineLearningCVE/Friday-WorkingHours-Morning.pcap_ISCX.csv', 'MachineLearningCVE/Monday-WorkingHours.pcap_ISCX.csv','MachineLearningCVE/Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv', 'MachineLearningCVE/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv', 'MachineLearningCVE/Tuesday-WorkingHours.pcap_ISCX.csv', 'MachineLearningCVE/Wednesday-workingHours.pcap_ISCX.csv']
df_aug = pd.read_csv(filenames[0])
for name in filenames[1:]:
  df = pd.read_csv(name)
  df_aug = pd.concat([df_aug, df], axis=0)
  
print(df_aug.shape)
df_aug.to_csv('IDS-2017-full.csv', index=False)

In [None]:
import pandas as pd
import numpy as np
from numpy import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from imblearn.under_sampling import RandomUnderSampler

#Read data

print('Reading data ...')
df = pd.read_csv('IDS-2017-full.csv',)
print(df.columns)
print('Reading done')
print(df.shape)
df=df.replace([np.inf, -np.inf], np.nan)
df.dropna(inplace=True)
print(df.shape)
cols = df.columns[:-1]
RANDOM_SEED = 41

le = LabelEncoder()
df[' Label'] = le.fit_transform(df[' Label'])
Y = df[' Label']
X = df.drop([' Label'], axis=1)


X=X.astype('float64')

X.fillna(0,inplace=True)

#Replace inf values (with NaN ?)
X=X.replace([np.inf, -np.inf], np.nan)

#Fill NaN
X.fillna(0.0, inplace=True)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

del df

In [None]:
from numpy import *
appended = np.append(X, Y[:, newaxis], axis=1)
appended_train = np.append(X_train, Y_train[:, newaxis],  axis=1)
print(appended.shape)
print(appended_train.shape)

appended_rows = appended.view([('', appended.dtype)] * appended.shape[1])
appended_train_rows = appended_train.view([('', appended_train.dtype)] * appended_train.shape[1])
appended_test = np.setdiff1d(appended_rows, appended_train_rows).view(appended.dtype).reshape(-1, appended.shape[1])
X_test = appended_test[:,:-1]
Y_test = appended_test[:,-1]


In [None]:
scaler=MinMaxScaler()
X_train=pd.DataFrame(scaler.fit_transform(X_train),columns=cols)
X_test=pd.DataFrame(scaler.transform(X_test),columns=cols)

In [None]:
from collections import Counter

Y_train_generic = np.where(Y_train > 0, 1, 0)
Y_test_generic = np.where(Y_test > 0, 1, 0)
X_temp = X_train[Y_train_generic==1]


In [None]:
import keras
import tensorflow as tf
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import *


generic_in = Input(shape=(X_train.shape[1],))
e1 = Dense(52, activation='sigmoid')(generic_in)
h = Dense(26, activation='relu')(e1)
d1 = Dense(52, activation='relu')(h)
generic_out = Dense(X_train.shape[1], activation='sigmoid')(d1)

generic_model = Model(inputs=generic_in, outputs=generic_out)
generic_model.compile(optimizer='adam', loss='mse', metrics=['mse']) 
generic_model.summary()
history = generic_model.fit(X_temp, X_temp, batch_size=64, epochs=30, verbose=1)
generic_model.save('generic.h5')

In [None]:
dos_map = {0: 0, 1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 0, 8: 6, 9: 0, 10: 0, 11: 0, 12: 0, 13: 0, 14: 0}
brute_map = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 1, 8: 0, 9: 0, 10: 0, 11: 2, 12: 0, 13: 0, 14: 0}
web_map = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0, 11: 0, 12: 1, 13: 2, 14: 3}

Y_train_dos = np.array(list(map(lambda d: dos_map[d], Y_train)))
Y_test_dos = np.array(list(map(lambda d: dos_map[d], Y_test)))
Y_train_brute = np.array(list(map(lambda d: brute_map[d], Y_train)))
Y_test_brute = np.array(list(map(lambda d: brute_map[d], Y_test)))
Y_train_web = np.array(list(map(lambda d: web_map[d], Y_train)))
Y_test_web = np.array(list(map(lambda d: web_map[d], Y_test)))
Y_train_port = np.array(list(map(lambda d: 1 if d==10 else 0, Y_train)))
Y_test_port = np.array(list(map(lambda d: 1 if d==10 else 0, Y_test)))
Y_train_bot = np.array(list(map(lambda d: 1 if d==1 else 0, Y_train)))
Y_test_bot = np.array(list(map(lambda d: 1 if d==1 else 0, Y_test)))

In [None]:
X_temp = X_train[Y_train_dos>0]
dos_in = Input(shape=(X_train.shape[1],))
e_dos = Dense(52, activation='sigmoid')(dos_in)
h_dos = Dense(26, activation='relu')(e_dos)
d_dos = Dense(52, activation='relu')(h_dos)
dos_out = Dense(X_train.shape[1], activation='sigmoid')(d_dos)

dos_model = Model(inputs=dos_in, outputs=dos_out)
dos_model.compile(optimizer='adam', loss='mse', metrics=['mse']) 
dos_model.summary()
history_dos = dos_model.fit(X_temp, X_temp, batch_size=64, epochs=30, verbose=1)
dos_model.save('dos.h5')

In [None]:
X_temp = X_train[Y_train_brute>0]
brute_in = Input(shape=(X_train.shape[1],))
e_brute = Dense(52, activation='sigmoid')(brute_in)
h_brute = Dense(26, activation='relu')(e_brute)
d_brute = Dense(52, activation='relu')(h_brute)
brute_out = Dense(X_train.shape[1], activation='sigmoid')(d_brute)

brute_model = Model(inputs=brute_in, outputs=brute_out)
brute_model.compile(optimizer='adam', loss='mse', metrics=['mse']) 
brute_model.summary()
history_brute = brute_model.fit(X_temp, X_temp, batch_size=64, epochs=30, verbose=1)
brute_model.save('brute.h5')

In [None]:
X_temp = X_train[Y_train_web>0]
web_in = Input(shape=(X_train.shape[1],))
e_web = Dense(52, activation='sigmoid')(web_in)
h_web = Dense(26, activation='relu')(e_web)
d_web = Dense(52, activation='relu')(h_web)
web_out = Dense(X_train.shape[1], activation='sigmoid')(d_web)

web_model = Model(inputs=web_in, outputs=web_out)
web_model.compile(optimizer='adam', loss='mse', metrics=['mse']) 
web_model.summary()
history_web = web_model.fit(X_temp, X_temp, batch_size=64, epochs=60, verbose=1)
web_model.save('web.h5')

In [None]:
X_temp = X_train[Y_train_port>0]
port_in = Input(shape=(X_train.shape[1],))
e_port = Dense(52, activation='sigmoid')(port_in)
h_port = Dense(26, activation='relu')(e_port)
d_port = Dense(52, activation='relu')(h_port)
port_out = Dense(X_train.shape[1], activation='sigmoid')(d_port)

port_model = Model(inputs=port_in, outputs=port_out)
port_model.compile(optimizer='adam', loss='mse', metrics=['mse']) 
port_model.summary()
history_port = port_model.fit(X_temp, X_temp, batch_size=64, epochs=30, verbose=1)
port_model.save('port.h5')

In [None]:
X_temp = X_train[Y_train_bot>0]
bot_in = Input(shape=(X_train.shape[1],))
e_bot = Dense(52, activation='sigmoid')(bot_in)
h_bot = Dense(26, activation='relu')(e_bot)
d_bot = Dense(52, activation='relu')(h_bot)
bot_out = Dense(X_train.shape[1], activation='sigmoid')(d_bot)

bot_model = Model(inputs=bot_in, outputs=bot_out)
bot_model.compile(optimizer='adam', loss='mse', metrics=['mse']) 
bot_model.summary()
history_bot = bot_model.fit(X_temp, X_temp, batch_size=64, epochs=60, verbose=1)
bot_model.save('bot.h5')

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier

generic_model_hidden = Model(inputs=generic_model.input, outputs=generic_model.layers[-3].output)
H_train = generic_model_hidden.predict(X_train)
H_test = generic_model_hidden.predict(X_test)

print(H_train.shape)
print(H_test.shape)

dos_model_hidden = Model(inputs=dos_model.input, outputs=dos_model.layers[-3].output)
brute_model_hidden = Model(inputs=brute_model.input, outputs=brute_model.layers[-3].output)
web_model_hidden = Model(inputs=web_model.input, outputs=web_model.layers[-3].output)
port_model_hidden = Model(inputs=port_model.input, outputs=port_model.layers[-3].output)
bot_model_hidden = Model(inputs=bot_model.input, outputs=bot_model.layers[-3].output)

In [None]:
H_train_temp = dos_model_hidden.predict(X_train)
H_test_temp = dos_model_hidden.predict(X_test)

H_train_concat = np.append(H_train, H_train_temp, axis=1)
H_test_concat = np.append(H_test, H_test_temp, axis=1)
print(H_train_concat.shape)
print(H_test_concat.shape)

H_train_temp = brute_model_hidden.predict(X_train)
H_test_temp = brute_model_hidden.predict(X_test)

H_train_concat = np.append(H_train_concat, H_train_temp, axis=1)
H_test_concat = np.append(H_test_concat, H_test_temp, axis=1)
print(H_train_concat.shape)
print(H_test_concat.shape)

H_train_temp = web_model_hidden.predict(X_train)
H_test_temp = web_model_hidden.predict(X_test)

H_train_concat = np.append(H_train_concat, H_train_temp, axis=1)
H_test_concat = np.append(H_test_concat, H_test_temp, axis=1)
print(H_train_concat.shape)
print(H_test_concat.shape)

H_train_temp = port_model_hidden.predict(X_train)
H_test_temp = port_model_hidden.predict(X_test)

H_train_concat = np.append(H_train_concat, H_train_temp, axis=1)
H_test_concat = np.append(H_test_concat, H_test_temp, axis=1)
print(H_train_concat.shape)
print(H_test_concat.shape)

H_train_temp = bot_model_hidden.predict(X_train)
H_test_temp = bot_model_hidden.predict(X_test)
H_train_concat = np.append(H_train_concat, H_train_temp, axis=1)
H_test_concat = np.append(H_test_concat, H_test_temp, axis=1)
print(H_train_concat.shape)
print(H_test_concat.shape)


In [None]:
clf = RandomForestClassifier(n_estimators = 5, verbose=2, oob_score=True, n_jobs=-1)
clf.fit(H_train_concat, Y_train)
Y_pred_concat = clf.predict(H_test_concat)
print(classification_report(Y_test, Y_pred_concat))
print(accuracy_score(Y_test, Y_pred_concat))
print(confusion_matrix(Y_test, Y_pred_concat))