In [1]:
import time
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import model_selection
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
from sklearn import metrics
from pipeline import create_pipeline
import os
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
import itertools


In [2]:

# 전처리
# Get Dataset

files={
    'CICI':'/home/irteam/junghye-dcloud-dir/MLAC/new_data/CICI.csv',
    'UNSW':'/home/irteam/junghye-dcloud-dir/MLAC/new_data/UNSW.csv'
}

data = pd.read_csv(files['CICI'])
data=data[np.isfinite(data).all(1)]

binary_target=data['label']

data=data.drop(labels=['label','attack_category','nist_category'],axis=1)

In [3]:
# Define Models
models = []
models.append(('RF', RandomForestClassifier(max_depth=5, n_estimators=5, max_features=3)))    
models.append(('CART', DecisionTreeClassifier(max_depth=5)))
models.append(('NB', GaussianNB()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('QDA', QuadraticDiscriminantAnalysis()))
models.append(('LR', LogisticRegression(solver='lbfgs', max_iter=200)))
models.append(('ABoost', AdaBoostClassifier()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('MLP', MLPClassifier()))

In [4]:
df=pd.DataFrame(columns=['name','acc','f1_mi','f1_ma','f1_we','rc_mi','rc_ma','rc_we']+\
                 ['pc_mi','pc_ma','pc_we'])
eval_path='/home/irteam/junghye-dcloud-dir/MLAC/evaluation'
confusion_path='/home/irteam/junghye-dcloud-dir/MLAC/confusion_matrix/CICI_binary'
cnt=0

In [5]:
# train_test_split
X_train,X_test,y_train,y_test=train_test_split(data,binary_target,test_size=0.3, shuffle=True, stratify=binary_target, random_state=34)

In [6]:
# confusion matrix plot
def plot_confusion_matrix(con_mat,labels,title:str,cmap=plt.cm.get_cmap('Blues'),normalize=False):
    plt.imshow(con_mat,interpolation='nearest',cmap=cmap)
    plt.title(title)
    plt.colorbar()
    marks=np.arange(len(labels))
    nlabels=[]
    for k in range(len(con_mat)):
        n=sum(con_mat[k])
        nlabel='{0}(n={1})'.format(labels[k],n)
        nlabels.append(nlabel)

    plt.xticks(marks,labels)
    plt.yticks(marks,nlabels)

    thresh=con_mat.max()/2.
    if normalize:
        for i, j in itertools.product(range(con_mat.shape[0]), range(con_mat.shape[1])):
            plt.text(j, i, '{0}%'.format(con_mat[i, j] * 100 / n), horizontalalignment="center", color="white" if con_mat[i, j] > thresh else "black")
    else:
        for i, j in itertools.product(range(con_mat.shape[0]), range(con_mat.shape[1])):
            plt.text(j, i, con_mat[i, j], horizontalalignment="center", color="white" if con_mat[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    #plt.show()
    #이미지 저장
    plt.savefig(confusion_path+'/'+title+'.png',facecolor='#eeeeee')
    plt.clf()

In [None]:
for name, model in models:
    #data_loader()
    # binary classification
    binary_model=create_pipeline(model)
   
    print('training start...')
    binary_model.fit(X_train,y_train)
    
    #evaluation
    print('evaluation start...')
    binary_pred=binary_model.predict(X_test)
    #evaluation result
    model_eval=[]
    model_eval.append(name)
    
    acc = accuracy_score(y_test, binary_pred)
    f1_mi = f1_score(y_test, binary_pred,average='micro')
    f1_ma = f1_score(y_test, binary_pred,average='macro')
    f1_we = f1_score(y_test, binary_pred,average='weighted')
    recall_mi = recall_score(y_test, binary_pred, average='micro')
    recall_ma = recall_score(y_test, binary_pred, average='macro')
    recall_we = recall_score(y_test, binary_pred, average='weighted')
    precision_mi = precision_score(y_test, binary_pred, average='micro')
    precision_ma = precision_score(y_test, binary_pred, average='macro')
    precision_we = precision_score(y_test, binary_pred, average='weighted')
    
    model_eval.extend([acc,f1_mi,f1_ma,f1_we,recall_mi,recall_ma,recall_we,precision_mi,precision_ma,precision_we])
    
    #confusion_metrics
    confusion=metrics.confusion_matrix(y_test,binary_pred)
    plot_confusion_matrix(confusion,labels=list(set(binary_target)),title=name)
       


    print(f'name:{name},acc:{acc},f1_score:{f1_mi},{f1_ma},{f1_we},recall:{recall_mi},{recall_ma},{recall_we},precision:{precision_mi},{precision_ma},{precision_we}')
    df.loc[cnt]=model_eval

    cnt+=1
    

df.to_csv(os.path.join(eval_path,'CICI_binary.csv'),index=False)


name:RF,acc:0.9707990845352286,f1_score:0.9707990845352286,0.9607043462713616,0.9703237425558238,recall:0.9707990845352286,0.9468811910272082,0.9707990845352286,precision:0.9707990845352286,0.9769821587948793,0.9713551966346159
name:CART,acc:0.9612225565327313,f1_score:0.9612225565327313,0.9504394823883832,0.9616047011925579,recall:0.9612225565327313,0.9598379425409532,0.9612225565327313,precision:0.9612225565327313,0.9420557354817002,0.962630988023668
name:NB,acc:0.7739617958810266,f1_score:0.7739617958810266,0.7563268743151864,0.7879874355499022,recall:0.7739617958810266,0.8399698145967354,0.7739617958810266,precision:0.7739617958810266,0.7614581898477408,0.8711396156195855
name:LDA,acc:0.9625731915939194,f1_score:0.9625731915939194,0.9509122664180607,0.9624675609756216,recall:0.9625731915939194,0.948405421716106,0.9625731915939194,precision:0.9625731915939194,0.9534964724620768,0.9624102247120576
name:QDA,acc:0.9650256605208135,f1_score:0.9650256605208135,0.9558009082775571,0.965553

In [None]:
# 굳이..
from PIL import Image

#파일명
img_files=os.listdir(confusion_path)

images=[] # 이미지 리스트
for img_file in img_files:
    img=Image.open(img_file)
    img.resize((400,300))
    images.append(img)

# 크기
width,height=images[0].size
new_width=width*3
new_height=height*3
new_img=Image.new('RGB',(new_width,new_height))

#이미지 합치기
x_offset=0
y_offset=0
for img in images:
    new_img.paste(img,(x_offset,y_offset))
    x_offset+=width
    if x_offset==new_width:
        x_offset=0
        y_offset+=height


new_img.save(confusion_path+'merged_image.jpg')