In [1]:
from sklearn.metrics import accuracy_score # for calculating accuracy of model
from sklearn.model_selection import train_test_split # for splitting the dataset for training and testing
from sklearn.metrics import classification_report # for generating a classification report of model
import pickle # saving and loading trained model
from os import path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC

In [3]:
bin_data = pd.read_csv(r"C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\datasets\bin_data.csv")
bin_data.drop(bin_data.columns[0],axis=1,inplace=True)
multi_data = pd.read_csv(r"C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\datasets\multi_data.csv")
multi_data.drop(multi_data.columns[0],axis=1,inplace=True)
le1_classes_ = np.load(r"C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\labels\le1_classes.npy",allow_pickle=True)
le2_classes_ = np.load(r"C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\labels\le2_classes.npy",allow_pickle=True)

In [5]:
X = bin_data.iloc[:,0:93].to_numpy() # dataset excluding target attribute (encoded, one-hot-encoded,original)
Y = bin_data['intrusion']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.25, random_state=42) 

In [9]:
lsvm = SVC(kernel='linear',gamma='auto') 
lsvm.fit(X_train,y_train) 

In [11]:
pkl_filename = r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\models\lsvm_binary.pkl'
if (not path.isfile(pkl_filename)):
  # saving the trained model to disk
  with open(pkl_filename, 'wb') as file:
    pickle.dump(lsvm, file)
  print("Saved model to disk")
  # loading the trained model from disk
  with open(pkl_filename, 'rb') as file:
    lsvm = pickle.load(file)
  print("Loaded model from disk") 

In [13]:
y_pred = lsvm.predict(X_test) # predicting target attribute on testing dataset
ac = accuracy_score(y_test, y_pred)*100 # calculating accuracy of predicted data
print("LSVM-Classifier Binary Set-Accuracy is ", ac)


LSVM-Classifier Binary Set-Accuracy is  96.69778370483266


In [15]:
print(classification_report(y_test, y_pred,target_names=le1_classes_))

              precision    recall  f1-score   support

    abnormal       0.97      0.96      0.96     14720
      normal       0.96      0.97      0.97     16774

    accuracy                           0.97     31494
   macro avg       0.97      0.97      0.97     31494
weighted avg       0.97      0.97      0.97     31494



quadratic svm

In [18]:
qsvm=SVC(kernel='poly',gamma='auto') # using kernal as polynomial for quadratic svm
qsvm.fit(X_train,y_train)

In [20]:
pkl_filename = r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\models\qsvm_binary.pkl'
if (not path.isfile(pkl_filename)):
  # saving the trained model to disk 
  with open(pkl_filename, 'wb') as file:
    pickle.dump(qsvm, file)
  print("Saved model to disk")
  # loading the trained model from disk
  with open(pkl_filename, 'rb') as file:
    qsvm = pickle.load(file)
  print("Loaded model from disk")

In [22]:
y_pred=qsvm.predict(X_test) # predicting target attribute on testing dataset
ac=accuracy_score(y_test, y_pred)*100 # calculating accuracy of predicted data
print("QSVM-Classifier Binary Set-Accuracy is ", ac)

QSVM-Classifier Binary Set-Accuracy is  95.71029402425859


In [24]:
print(classification_report(y_test, y_pred,target_names=le1_classes_))

              precision    recall  f1-score   support

    abnormal       0.99      0.92      0.95     14720
      normal       0.93      0.99      0.96     16774

    accuracy                           0.96     31494
   macro avg       0.96      0.95      0.96     31494
weighted avg       0.96      0.96      0.96     31494



knn

In [26]:
from sklearn.neighbors import KNeighborsClassifier

In [28]:
knn=KNeighborsClassifier(n_neighbors=5) # creating model for 5 neighbors
knn.fit(X_train,y_train)

In [30]:
pkl_filename = r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\models\knn_binary.pkl'
if (not path.isfile(pkl_filename)):
  # saving the trained model to disk
  with open(pkl_filename, 'wb') as file:
    pickle.dump(knn, file)
  print("Saved model to disk")
  # loading the trained model from disk
  with open(pkl_filename, 'rb') as file:
    knn = pickle.load(file)
  print("Loaded model from disk")

In [32]:
y_pred=knn.predict(X_test) # predicting target attribute on testing dataset
ac=accuracy_score(y_test, y_pred)*100 # calculating accuracy of predicted data
print("KNN-Classifier Binary Set-Accuracy is ", ac)

KNN-Classifier Binary Set-Accuracy is  98.55210516288817


In [34]:
print(classification_report(y_test, y_pred,target_names=le1_classes_))

              precision    recall  f1-score   support

    abnormal       0.99      0.98      0.98     14720
      normal       0.99      0.99      0.99     16774

    accuracy                           0.99     31494
   macro avg       0.99      0.99      0.99     31494
weighted avg       0.99      0.99      0.99     31494



IDA

In [36]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [38]:
lda = LinearDiscriminantAnalysis() 
lda.fit(X_train, y_train)

In [40]:
pkl_filename = r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\models\lda_binary.pkl'
if (not path.isfile(pkl_filename)):
  # saving trained model to disk
  with open(pkl_filename, 'wb') as file:
    pickle.dump(lda, file)
  print("Saved model to disk")
  # loading trained model from disk
  with open(pkl_filename, 'rb') as file:
    lda = pickle.load(file)
  print("Loaded model from disk")

In [42]:
y_pred = lda.predict(X_test) # predicting target attribute on testing dataset
ac=accuracy_score(y_test, y_pred)*100 # calculating accuracy of predicted data
print("LDA-Classifier Set-Accuracy is ", ac)

LDA-Classifier Set-Accuracy is  96.70730932876104


In [44]:
print(classification_report(y_test, y_pred,target_names=le1_classes_))

              precision    recall  f1-score   support

    abnormal       0.97      0.96      0.96     14720
      normal       0.96      0.98      0.97     16774

    accuracy                           0.97     31494
   macro avg       0.97      0.97      0.97     31494
weighted avg       0.97      0.97      0.97     31494



MLP

In [47]:
from keras.layers import Dense # importing dense layer
from keras.models import Sequential #importing Sequential layer
from keras.models import model_from_json # saving and loading trained model

In [None]:
X = bin_data.iloc[:,0:93].values # dataset excluding target attribute (encoded, one-hot-encoded,original)
Y = bin_data[['intrusion']].values # target attribute

In [None]:
# splitting the dataset 75% for training and 25% testing
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.25, random_state=42)

In [None]:
mlp = Sequential() # creating model

# adding input layer and first layer with 50 neurons
mlp.add(Dense(units=50, input_dim=X_train.shape[1], activation='relu'))
# output layer with sigmoid activation
mlp.add(Dense(units=1,activation='sigmoid'))

In [None]:
mlp.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
mlp.summary()

In [None]:
history = mlp.fit(X_train, y_train, epochs=100, batch_size=5000,validation_split=0.2)

In [None]:
filepath = r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\models\mlp_binary.json'
weightspath = r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\weights\mlp_binary.h5'
if (not path.isfile(filepath)):
  # serialize model to JSON
  mlp_json = mlp.to_json()
  with open(filepath, "w") as json_file:
    json_file.write(mlp_json)

  # serialize weights to HDF5
  mlp.save_weights(weightspath)
  print("Saved model to disk")

  # load json and create model
  json_file = open(filepath, 'r')
  loaded_model_json = json_file.read()
  json_file.close()
  mlp = model_from_json(loaded_model_json)

  # load weights into new model
  mlp.load_weights(weightspath)
  print("Loaded model from disk")

In [None]:
mlp.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
test_results = mlp.evaluate(X_test, y_test, verbose=1)
print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]*100}')

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title("Plot of accuracy vs epoch for train and test dataset")
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.savefig(r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\plots\mlp_binary_accuracy.png')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Plot of loss vs epoch for train and test dataset')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.savefig(r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\plots\mlp_binary_loss.png')
plt.show()

lstm

In [None]:
from keras.layers import LSTM

In [None]:
X = bin_data.iloc[:,0:93] # dataset excluding target attribute (encoded, one-hot-encoded,original)
Y = bin_data[['intrusion']].values 

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.25, random_state=42)

In [None]:
X_train = X_train.to_numpy()

In [None]:
x_train = np.reshape(X_train, (X_train.shape[0],X_train.shape[1],1))

In [None]:
lst = Sequential() # initializing model

# input layer and LSTM layer with 50 neurons
lst.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1],1)))
# outpute layer with sigmoid activation
lst.add(Dense(1, activation='sigmoid'))

In [None]:
lst.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
lst.summary()

In [None]:
history = lst.fit(x_train, y_train, epochs=100, batch_size=5000,validation_split=0.2)

In [None]:
filepath = r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\models\lst_binary.json'
weightspath = r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\weights\lst_binary.h5'
if (not path.isfile(filepath)): 
  # serialize model to JSON
  autoencoder_json = autoencoder.to_json()
  with open(filepath, "w") as json_file:
    json_file.write(autoencoder_json)

  # serialize weights to HDF5
  autoencoder.save_weights(weightspath)
  print("Saved model to disk")

  # load json and create model
  json_file = open(filepath, 'r')
  loaded_model_json = json_file.read()
  json_file.close()
  autoencoder = model_from_json(loaded_model_json)

  # load weights into new model
  autoencoder.load_weights(weightspath)
  print("Loaded model from disk")

In [None]:
lst.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
test_results = lst.evaluate(X_test, X_test, verbose=1)
print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]*100}')

In [None]:
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title("Plot of loss vs epoch for train and test dataset")
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.savefig(r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\plots\lstm_binary_loss.png')
plt.show()

In [None]:
plt.plot(history['accuracy'])
plt.plot(history['val_accuracy'])
plt.title("Plot of accuracy vs epoch for train and test dataset")
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.savefig(r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\plots\lstm_binary_accuracy.png')
plt.show()

auto encoder

In [None]:
from keras.layers import Input
from keras.models import Model

In [None]:
X_train, X_test = train_test_split(bin_data, test_size=0.25, random_state=42)

In [None]:
X_train = X_train.drop(['intrusion','abnormal','normal','label'],axis=1) 

y_test = X_test['intrusion'] # target attribute

# dataset excluding target attribute (encoded, one-hot-encoded,original)
X_test = X_test.drop(['intrusion','abnormal','normal','label'],axis=1)

In [None]:
X_train = X_train.values
X_test = X_test.values
y_test = y_test.values

In [None]:
input_dim = X_train.shape[1]
encoding_dim = 50

#input layer
input_layer = Input(shape=(input_dim, ))
#encoding layer with 50 neurons
encoder = Dense(encoding_dim, activation="relu")(input_layer)           
#decoding and output layer
output_layer = Dense(input_dim, activation='softmax')(encoder)

In [None]:
autoencoder = Model(inputs=input_layer, outputs=output_layer)

# defining loss function, optimizer, metrics and then compiling model
autoencoder.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])

In [None]:
autoencoder.summary()

In [None]:
history = autoencoder.fit(X_train, X_train, epochs=100,batch_size=500,validation_data=(X_test, X_test)).history

In [None]:
filepath = r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\models\ae_binary.json'
weightspath = r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\weights\ae_binary.h5'
if (not path.isfile(filepath)): 
  # serialize model to JSON
  autoencoder_json = autoencoder.to_json()
  with open(filepath, "w") as json_file:
    json_file.write(autoencoder_json)

  # serialize weights to HDF5
  autoencoder.save_weights(weightspath)
  print("Saved model to disk")

  # load json and create model
  json_file = open(filepath, 'r')
  loaded_model_json = json_file.read()
  json_file.close()
  autoencoder = model_from_json(loaded_model_json)

  # load weights into new model
  autoencoder.load_weights(weightspath)
  print("Loaded model from disk")

In [None]:
autoencoder.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])

In [None]:
test_results = autoencoder.evaluate(X_test, X_test, verbose=1)
print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]}%')

In [None]:
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title("Plot of loss vs epoch for train and test dataset")
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.savefig(r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\plots\ae_binary_loss.png')
plt.show()

In [None]:
plt.plot(history['accuracy'])
plt.plot(history['val_accuracy'])
plt.title("Plot of accuracy vs epoch for train and test dataset")
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.savefig(r'C:\Users\bonik\Downloads\Network-Intrusion-Detection-Using-Machine-Learning-master\Network-Intrusion-Detection-Using-Machine-Learning-master\plots\ae_binary_accuracy.png')
plt.show()

In [None]:
predictions = autoencoder.predict(X_test)

In [None]:
mse = np.mean(np.power(X_test - predictions, 2), axis=1)
error_df = pd.DataFrame({'reconstruction_error': mse,
                        'true_class': y_test})

In [None]:
error_df.describe()