In [1]:
# For reading, visualizing, and preprocessing data
import numpy as np
import pandas as pd
import seaborn as sns
import itertools
import time
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn import model_selection
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, matthews_corrcoef, f1_score, precision_score, recall_score, cohen_kappa_score, log_loss, roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

# Classifiers
from sklearn.svm import NuSVC, SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
# from tensorflow.keras.models import Sequential
#from mlxtend.classifier import StackingCVClassifier 

# Used to ignore warnings generated from StackingCVClassifier
import warnings
warnings.simplefilter('ignore')

In [2]:
data = pd.read_csv(".\\numeric_data.csv")
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [3]:
numeric_data = data.copy()
numeric_data.drop(columns=numeric_data.columns[0],axis=1,inplace=True)

In [4]:
X = numeric_data.iloc[:,0:93].to_numpy() 
Y = numeric_data['intrusion_no']
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.20, random_state=42)
print(X_train.shape, X_test.shape)
print(Y_train.shape, Y_test.shape)

(100778, 93) (25195, 93)
(100778,) (25195,)


In [5]:
def matrix(Y_test,Y_test_pred,name):
    # Calculate the confusion matrix
    conf_matrix = confusion_matrix(y_true=Y_test, y_pred=Y_test_pred)
    # Print the confusion matrix using Matplotlib
    fig, ax = plt.subplots(figsize=(5, 5))
    ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3)
    ax.set_title(name); 
    for i in range(conf_matrix.shape[0]):
        for j in range(conf_matrix.shape[1]):
            ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='large')

In [6]:
def plot(Y_test,Y_test_pred,name):
    plt.figure(figsize=(22,10))
    plt.plot(Y_test_pred[100:200], label="Prediction", linewidth=2.5,color='blue')
    plt.plot(Y_test[100:200].values, label="Actual_values", linewidth=2.5,color='lightcoral')
    plt.legend(loc="best")
    plt.title(name)
    plt.show()

In [None]:
from itertools import combinations,product
algo_list = [
    ('SVM',SVC(gamma = 'scale',probability=True)),
    ('KNN',KNeighborsClassifier(5)),
    ('GAUSSIAN NB',GaussianNB()),
    ('DT',DecisionTreeClassifier(criterion="entropy", max_depth = 4)),
    ('RF',RandomForestClassifier(n_estimators=30)),
    ('LR',LogisticRegression(max_iter=1200000))
    ]
list_combo = []
for n in range(len(algo_list) + 1):
    list_combo += combinations(algo_list, n)
res = list_combo[22:27]
for i in res:
    print('\n')
    print(i)
    stack_model = StackingClassifier(estimators=i, final_estimator=MLPClassifier())
    start_time = time.time()
    stack_model.fit(X_train, Y_train)
    end_time = time.time()
    print("- Training time : ",end_time-start_time)
    start_time = time.time()
    Y_test_predf1 = stack_model.predict(X_test)
    end_time = time.time()
    print("- Testing time : ",end_time-start_time)
    print('----------------------------------')
    Y_train_predf1 = stack_model.predict(X_train)
    Y_train_probsf1 = stack_model.predict_proba(X_train)
    Y_test_probsf1 = stack_model.predict_proba(X_test)

    # Training set model performance
    stack_model_train_accuracy = accuracy_score(Y_train, Y_train_predf1)
    stack_model_train_mcc = matthews_corrcoef(Y_train, Y_train_predf1)
    stack_model_train_f1 = f1_score(Y_train, Y_train_predf1, average='weighted') 
    stack_model_train_precision = precision_score(Y_train, Y_train_predf1,average='weighted')
    stack_model_train_recall = recall_score(Y_train, Y_train_predf1, average='weighted')
    stack_model_train_ckscore = cohen_kappa_score(Y_train, Y_train_predf1)
    stack_model_train_logloss = log_loss(Y_train, Y_train_probsf1)

    # Test set model performance
    stack_model_test_accuracy = accuracy_score(Y_test, Y_test_predf1) 
    stack_model_test_mcc = matthews_corrcoef(Y_test, Y_test_predf1)
    stack_model_test_f1 = f1_score(Y_test, Y_test_predf1, average='weighted')
    stack_model_test_precision = precision_score(Y_test, Y_test_predf1,average='weighted')
    stack_model_test_recall = recall_score(Y_test, Y_test_predf1, average='weighted')
    stack_model_test_ckscore = cohen_kappa_score(Y_test, Y_test_predf1)
    stack_model_test_logloss = log_loss(Y_test, Y_test_probsf1)

    print('- Train Accuracy : %s' % stack_model_train_accuracy)
    print('- Train MCC : %s' % stack_model_train_mcc)
    print('- Train F1 score : %s' % stack_model_train_f1)
    print('- Train Precision : %s' % stack_model_train_precision)
    print('- Train Recall : %s' % stack_model_train_recall)
    print('- Train Cohens Kappa Score : %s' % stack_model_train_ckscore)
    print('- Train Log Loss : %s' % stack_model_train_logloss)
    print('----------------------------------')
    print('Model performance for Test set')
    print('- Test Accuracy : %s' % stack_model_test_accuracy)
    print('- Test MCC : %s' % stack_model_test_mcc)
    print('- Test F1 score : %s' % stack_model_test_f1)
    print('- Test Precision : %s' % stack_model_test_precision)
    print('- Test Recall : %s' % stack_model_test_recall)
    print('- Test Cohens Kappa Score : %s' % stack_model_test_ckscore)
    print('- Test Log Loss : %s' % stack_model_test_logloss)
    print('----------------------------------')
    print("Mean Absolute Error - " , metrics.mean_absolute_error(Y_test, Y_test_predf1))
    print("Mean Squared Error - " , metrics.mean_squared_error(Y_test, Y_test_predf1))
    print("Root Mean Squared Error - " , np.sqrt(metrics.mean_squared_error(Y_test, Y_test_predf1)))
    print('----------------------------------')
    matrix(Y_test, Y_test_predf1, i)
    plot(Y_test, Y_test_predf1, i)



(('SVM', SVC(probability=True)), ('KNN', KNeighborsClassifier()), ('GAUSSIAN NB', GaussianNB()))
