In [2]:
import numpy as np
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
%matplotlib inline

In [None]:
#LE O ARQUIVO CSV
fig_size = (10, 6)
dataframe = pd.read_csv('/Users/Matheus Carvalho/Downloads/dataset3.csv', header=[0]) # load dataset
final_dataframe = dataframe.drop(["Node","Class","'Flood Status'",'Average_Delay_Time_Per_Sec'
                   ,'Percentage_Of_Lost_Pcaket_Rate','Percentage_Of_Lost_Byte_Rate'
                   ,'10-Run-AVG-Drop-Rate','10-Run-AVG-Bandwith-Use','10-Run-Delay',"'Packet Size_Byte'"], axis=1)

parametros1=['UBR','PDR','FB','PRR','UB','LB','PT','PR','PL','TB','RB']
class_names = ['B', "'P NB'", 'NB']

#NORMALIZAÇÃO DOS DADOS
scaler = preprocessing.StandardScaler().fit(final_dataframe.drop(["'Node Status'"], axis=1))
x_scaled = scaler.transform(final_dataframe.drop("'Node Status'", axis=1))
scaled_final_dataframe = pd.DataFrame(x_scaled, index=final_dataframe.index, columns=final_dataframe.columns[:-1])
y = final_dataframe["'Node Status'"]
scaled_final_dataframe["'Node Status'"]=y

#PCA
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(x_scaled)
principalDf = pd.DataFrame(data=principalComponents
                                   , columns=['principal component 1', 'principal component 2'])
finalDf = pd.concat([principalDf, dataframe[["'Node Status'"]]], axis=1)
plt.figure(figsize=fig_size)

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2']
markers = ['o', '*', 'v']
sns.lmplot(x='principal component 1',
               y='principal component 2',
               data=finalDf,
          fit_reg=False,
          hue="'Node Status'", markers=markers, legend=False, legend_out=False, height=fig_size[1], aspect=fig_size[0]/fig_size[1])

plt.legend(class_names, loc=9)
plt.xlabel('Principal Component 1', fontsize=15)
plt.ylabel('Principal Component 2', fontsize=15)
plt.tight_layout()


#MAPA DE CORRELAÇÃO
corr = scaled_final_dataframe.corr()
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
plt.figure(figsize=fig_size)
sns.heatmap(corr, linewidths=.5, mask=mask, xticklabels=parametros1, yticklabels=parametros1, linecolor='white', cmap="RdBu_r", annot=True)
plt.tight_layout()
plt.show()

#MAPA DE CORRELAÇÃO DE CADA CENÁRIO
for attack in class_names:
    corr = scaled_final_dataframe.where(dataframe["'Node Status'"] == attack).corr()

    plt.figure(figsize=fig_size)
    sns.heatmap(corr, linewidths=.5, mask=mask,xticklabels=parametros1, yticklabels=parametros1, linecolor='white', cmap="RdBu_r", annot=True)
    plt.title('Correlation only for the ' + attack + ' scenario')

#MAPA DE DIFERENTES CORRELAÇÕES DOS CENÁRIOS DE ATAQUES

no_attack_corr = scaled_final_dataframe.where(dataframe["'Node Status'"] == 'B').corr()
for attack in class_names:
    corr = scaled_final_dataframe.where(dataframe["'Node Status'"] == attack).corr()

    plt.figure(figsize=fig_size)
    sns.heatmap(corr - no_attack_corr, linewidths=.5, mask=mask,xticklabels=parametros1, yticklabels=parametros1, linecolor='white', annot=True, cmap="RdBu_r")
    plt.title('Difference from ' + class_names[0] + ' to ' + attack + ' attack')

    plt.tight_layout()
    plt.show()

#LINHA DO TEMPO COM A CORRELAÇÃO

steps = [20, 30]
samples_per_attack = 60

X = []

for att in class_names:
    for s in range(samples_per_attack):
        i = (len(att) * samples_per_attack) + s
        samples = scaled_final_dataframe[(scaled_final_dataframe["'Node Status'"] == att)].sample(n=1)
        X.append(samples.values[0])


correlation_dataframe = pd.DataFrame(data=X, columns=scaled_final_dataframe.columns).corr()

X=np.reshape(X,newshape=(len(X),len(X[0])))
correlation_dataframe2 = pd.DataFrame(data=X, columns=scaled_final_dataframe.columns)

correlations = {}
attack_change = {}


for step in steps:
    correlations[step] = {}
    correlations[step]['PL<>PDR'] = []
    correlations[step]['PL<>PRR'] = []
    attack_change[step] = []
    last_attack="B"
    for start in range(0, len(correlation_dataframe) - step):
        attack_change[step].append(correlation_dataframe.iloc[start + step]["'Node Status'"] != last_attack)
        last_attack = correlation_dataframe.iloc[start + step]["'Node Status'"]
        corr = correlation_dataframe.drop(["'Node Status'"], axis=1).iloc[start:start + step].corr()
        correlations[step]['PL<>PDR'].append(corr['Packet_lost']["'Packet Drop Rate'"])
        correlations[step]['PL<>PRR'].append(corr['Packet_lost']["'Packet Received  Rate'"])

for step in steps:
    plt.figure(figsize=fig_size)
    ax1 = sns.lineplot(range(0, len(correlations[step]['PL<>PDR'])), correlations[step]['PL<>PDR'], label='PL / PDR')
    ax2 = sns.lineplot(range(0, len(correlations[step]['PL<>PRR'])), correlations[step]['PL<>PRR'], label='PL / PRR')
    ax2.lines[0].set_linestyle(":")
    ax2.lines[0].set_marker("o")
    ax2.lines[0].set_markevery(10)
    ax2.lines[1].set_linestyle("--")
    ax2.lines[1].set_marker("s")
    ax2.lines[1].set_markevery(10)

    for x, att in enumerate(attack_change[step]):
        if att:
            plt.axvline(x, color='red', linestyle=':')
    plt.xlabel('Number of monitoring samples')
    plt.ylabel('Correlation')
    plt.legend()
    plt.show()

#USANDO P DBSCAN

X = scaled_final_dataframe[(scaled_final_dataframe["'Node Status'"] == 'B')].drop("'Node Status'", axis=1).values
X = correlation_dataframe2[(correlation_dataframe2["'Node Status'"] == 'B')].drop("'Node Status'", axis=1).values

labels_true = scaled_final_dataframe[(scaled_final_dataframe["'Node Status'"] == 'B')]["'Node Status'"].values
labels_true = correlation_dataframe2[(correlation_dataframe2["'Node Status'"] == 'B')]["'Node Status'"].values

epsilon_configurations = [.1, .5, 1., 1., 2., 3., 4.]
min_samples_configurations = [1, 3, 5, 8, 10, 12, 15, 20]

values = np.zeros((len(min_samples_configurations), len(epsilon_configurations)))

predictors = np.empty((len(min_samples_configurations), len(epsilon_configurations)), dtype=DBSCAN)

for idm, min_samples in enumerate(min_samples_configurations):

    for ide, epsilon in enumerate(epsilon_configurations):

        dbscan = DBSCAN(eps=epsilon, min_samples=min_samples, metric='euclidean')
        db = dbscan.fit(X)
        labels = db.labels_
        values[idm, ide] = (np.sum([1 for label in labels if label == -1]) * 100) / len(X)
        predictors[idm, ide] = dbscan

plt.figure(figsize=fig_size)
sns.heatmap(values, linewidths=.5, linecolor='white', annot=True, cmap="RdBu_r", cbar=False, fmt='.2f')

plt.ylabel('$MinPts$')
plt.yticks([x + .5 for x in range(0, len(min_samples_configurations))], min_samples_configurations)

plt.xlabel('$\epsilon$')
plt.xticks([x + .5 for x in range(0, len(epsilon_configurations))], epsilon_configurations)

plt.tight_layout()


###########################

for attack in class_names:
    print('\nresults for attack', attack)

    X1 = final_dataframe[(final_dataframe["'Node Status'"] == 'B')].drop("'Node Status'", axis=1).values
    X2 = final_dataframe[(final_dataframe["'Node Status'"] == attack)].drop("'Node Status'", axis=1).values[:10]
    X = np.concatenate((X1, X2), axis=0)

    Y1 = final_dataframe[(final_dataframe["'Node Status'"] == 'B')]["'Node Status'"].values
    Y2 = final_dataframe[(final_dataframe["'Node Status'"] == attack)]["'Node Status'"].values[:10]
    labels_true = np.concatenate((Y1, Y2), axis=0)

    for epsilon in epsilon_configurations:
        print('\t', epsilon, end='\t')
    print('')

    for ide, epsilon in enumerate(epsilon_configurations):
        print('\t(fp, fn) %', end='')
    print('')

    for idm, min_samples in enumerate(min_samples_configurations):
        print(min_samples, end='\t')

        for ide, epsilon in enumerate(epsilon_configurations):

            labels = predictors[idm, ide].fit_predict(X)

            false_positives = np.sum(
                [1 if label == -1 else 0 for true_label, label in zip(labels_true, labels) if true_label == 'B'])
            false_negatives = np.sum(
                [1 if label > -1 else 0 for true_label, label in zip(labels_true, labels) if true_label != 'B'])

            print('%0.2f,' % (false_positives * 100 / len(X)), '%0.2f' % (false_negatives * 100 / len(X)), end='\t')
        print('')

plt.show()