In [None]:
import tensorflow.compat.v1 as v1
v1.disable_eager_execution()

In [None]:
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping
from scipy.stats import zscore

def srcprt_range(prt):

    if(prt['prt_src'] < 5000):
        return 1
    if(prt['prt_src'] < 10000):
        return 2
    if(prt['prt_src'] < 15000):
        return 3
    if(prt['prt_src'] < 20000):
        return 4
    if(prt['prt_src'] < 25000):
        return 5
    if(prt['prt_src'] < 30000):
        return 6
    if(prt['prt_src'] < 35000):
        return 7
    if(prt['prt_src'] < 40000):
        return 8
    if(prt['prt_src'] < 45000):
        return 9
    if(prt['prt_src'] < 50000):
        return 10
    if(prt['prt_src'] < 55000):
        return 11
    if(prt['prt_src'] < 60000):
        return 12
    return 13

def dstprt_range(prt):

    if(prt['prt_dst'] < 5000):
        return 1
    if(prt['prt_dst'] < 10000):
        return 2
    if(prt['prt_dst'] < 15000):
        return 3
    if(prt['prt_dst'] < 20000):
        return 4
    if(prt['prt_dst'] < 25000):
        return 5
    if(prt['prt_dst'] < 30000):
        return 6
    if(prt['prt_dst'] < 35000):
        return 7
    if(prt['prt_dst'] < 40000):
        return 8
    if(prt['prt_dst'] < 45000):
        return 9
    if(prt['prt_dst'] < 50000):
        return 10
    if(prt['prt_dst'] < 55000):
        return 11
    if(prt['prt_dst'] < 60000):
        return 12
    return 13

def def_model(input_dim, outputlayer_neurons):
    
    # Build neural network
    model = Sequential()
    model.add(Dense(50, input_dim=input_dim, activation='relu')) # Hidden 1
    model.add(Dense(25, activation='relu')) # Hidden 2
    model.add(Dense(outputlayer_neurons,activation='softmax')) # Output
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    
    return model

        
def train_model(train_dataframe, classification_type):

    if verbose:
        print(f'Test train splitting finished')

    x_columns = train_dataframe.columns.drop('is_attack')
    
    if 'multiclass' in train_dataframe.columns:
        x_columns = x_columns.drop('multiclass')

    print('X columns train')
    print(x_columns)
    
    if (classification_type==1):
        target_column = 'is_attack'
    else:
        target_column = 'multiclass'
    
    print('target test')
    print(target_column)
    
    print('x split')
    print(train_dataframe[x_columns].values)
    
    print('y split')
    print(pd.get_dummies(train_dataframe[target_column]).values)
    
    if verbose:
        print(f'Train validation splitting started')
        
    x_train, x_validation, y_train, y_validation = train_test_split(train_dataframe[x_columns].values, pd.get_dummies(train_dataframe[target_column]).values, test_size=0.20, random_state=42)

    del train_dataframe
    
    if verbose:
        print(f'Dataset splitting finished')
    
    model = def_model(x_train.shape[1], y_train.shape[1])
    
    monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=10, 
        verbose=1, mode='auto', restore_best_weights=True)
    model.fit(x_train,y_train,validation_data=(x_validation,y_validation),
        callbacks=[monitor],verbose=2,epochs=1000)
    
    del x_train
    del y_train
    del x_validation
    del y_validation    
    
    return model

In [None]:
from tensorflow.keras import models
    
def compute_metrics(pred, y_test):

    predict_classes = np.argmax(pred,axis=1)
    expected_classes = np.argmax(y_test,axis=1)
    correct = metrics.accuracy_score(expected_classes,predict_classes)
    
    print(f"Accuracy: {correct}")
    
    recall = metrics.recall_score(expected_classes,predict_classes, average='weighted')    
    print(f"Recall: {recall}")
       
    precision = metrics.precision_score(expected_classes,predict_classes, average='weighted')
    print(f"Precision: {precision}")
    
    f1score = metrics.f1_score(expected_classes,predict_classes, average='weighted')
    print(f"F1Score: {f1score}")
    
def test_data(df, classification_type, model):

    target_column = 'is_attack'
    x_columns = df.columns.drop(target_column)
    
    df = df.astype(int)
    
    x_test = df[x_columns].values
    y_test = pd.get_dummies(df[target_column]).values
    
    pred = model.predict(x_test)
    
    compute_metrics(pred, y_test)
    

In [None]:
def read_data(verbose = False):

    import numpy as np # used for handling numbers
    import pandas as pd # used for handling the dataset
    import datetime

    system_time = datetime.datetime.now()
    print(system_time.strftime("\nSTART  %H:%M:%S"))

    #Importing the Dataset
    df=pd.read_csv('C:/Dados/Nuno/MyDataset/MyDataset.txt', sep='\x09',header=0)
    df.dropna(inplace=True)

    labelcolumn = 'label'
    droplabelcolumn = 'detailed-label'
    
    df=df.drop(columns = droplabelcolumn)
    
    dcolumns=['id.orig_h','id.resp_h','service','duration','conn_state','history','missed_bytes','orig_ip_bytes','resp_ip_bytes','tunnel_parents']
    df = df.drop(columns = dcolumns)

    ft_col=[]

    for j in range(0,len(df.columns)):
        col = df.columns[j]
        #print("Dataset column: ",col)
        uniques = len(df[col].unique())
        #print("Number of unique values = ",uniques)
        if uniques > 1 and uniques < df.shape[0]-1:
            ft_col.append(col)#gives us the columns that are not constant or all different

    df=df[ft_col]
    df.info()

    df['prt_src'] = df['id.orig_p']
    df['prt_dst'] = df['id.resp_p']
    
    df['src_port'] = df.apply (lambda row: srcprt_range(row), axis=1)
    df = pd.concat([df,pd.get_dummies(df['src_port'],prefix="src_port_range")],axis=1)
    df.drop('src_port', axis=1, inplace=True)
    
    df['dst_port'] = df.apply (lambda row: dstprt_range(row), axis=1)
    df = pd.concat([df,pd.get_dummies(df['dst_port'],prefix="dst_port_range")],axis=1)
    df.drop('dst_port', axis=1, inplace=True)

    df.drop('prt_dst', axis=1, inplace = True)
    df.drop('id.orig_p', axis=1, inplace=True)
    df.drop('prt_src', axis=1, inplace=True)
    df.drop('id.resp_p', axis=1, inplace=True)
    
    df['proto'] = np.where(df['proto']=='tcp', 6, df['proto'])
    df['proto'] = np.where(df['proto']=='udp', 17, df['proto'])
    df['proto'] = np.where(df['proto']=='icmp', 1, df['proto'])
        
    df['orig_bytes'] = np.where(df['orig_bytes']=='-', 0, df['orig_bytes'])
    df['resp_bytes'] = np.where(df['resp_bytes']=='-', 0, df['resp_bytes'])

    df['orig_bytes'] = df['orig_bytes'].astype(float)
    df['resp_bytes'] = df['resp_bytes'].astype(float)
    
    df['orig_pkts'] = np.where(df['orig_pkts']=='-', 0, df['orig_pkts'])
    df['resp_pkts'] = np.where(df['resp_pkts']=='-', 0, df['resp_pkts'])
    
    
    df[df.filter(regex='orig_^',axis=1).head().columns] = zscore(df[df.filter(regex='orig_^',axis=1).head().columns])
    df[df.filter(regex='resp_^',axis=1).head().columns] = zscore(df[df.filter(regex='resp_^',axis=1).head().columns])
    
    
    df['is_attack'] = np.where(df['label']=='Malicious', 1, 0)
    df.drop('label', axis=1, inplace=True)
    
    df = pd.concat([df,pd.get_dummies(df['proto'],prefix="proto_id")],axis=1)
    df.drop('proto', axis=1, inplace=True)
        
    df.info()
    
    return df

In [None]:
# lendo os dados e preprocessando dataset
dfN = read_data()


In [None]:
print(dfN)

In [None]:
# dividindo dataset entre treino e testes, e treinando o modelo

verbose = True
dfN = dfN.astype(int)
dfN_test_dataframe = dfN.sample(frac=0.2, random_state=1337)
dfN_train_dataframe = dfN.drop(dfN_test_dataframe.index)

model = train_model(dfN_train_dataframe, 1)


In [None]:
# verificando o modelo nos dados de teste

test_data(dfN_test_dataframe,1, model)


In [None]:
#utilizando GanGenerator para gerar dados
from tabgan.sampler import OriginalGenerator, GANGenerator
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


df_x_train = dfN_train_dataframe.drop(columns=['is_attack'])
df_x_test = dfN_test_dataframe.drop(columns=['is_attack'])
df_y_train = dfN_train_dataframe[['is_attack']]
df_y_test = dfN_test_dataframe[['is_attack']]

gen_x, gen_y = GANGenerator().generate_data_pipe(df_x_train, df_y_train,
                                          df_x_test, deep_copy=True, only_adversarial=True, use_adversarial=True)


In [None]:
#testando modelo com dados gerados
gen_x['is_attack']=gen_y[1]
test_data(gen_x,1,model)


In [None]:
#utilizando adversarial-robustness-toolbox

from art.attacks.evasion import FastGradientMethod
from art.estimators.classification import KerasClassifier

In [None]:
# criando classificador

from art.attacks.evasion import FastGradientMethod
from art.estimators.classification import KerasClassifier
from keras.utils import to_categorical

df_x_train = dfN_train_dataframe.drop(columns=['is_attack'])
df_y_train = to_categorical(pd.get_dummies(dfN_train_dataframe[['is_attack']]).values)

new_model = def_model(df_x_train.shape[1], df_y_train.shape[1])

classifier = KerasClassifier(model=new_model)


In [None]:
#treinando o classificador
classifier.fit(df_x_train, df_y_train, nb_epochs=20)

#gerando previsoes com dados de teste
predictions = classifier.predict(df_x_test)

compute_metrics(predictions, df_y_test)



In [None]:
# gerando dados com FastGradientMethod

attack = FastGradientMethod(estimator=classifier, eps=0.2)
x_test_adv = attack.generate(x=df_x_test)


In [None]:
#dados originais
print(df_x_test)

#dados modificados - gerados por FastGradientMethod
print(x_test_adv)

In [None]:

# avaliando o ART classifier com dados gerados por FastGradientMethod 
predictions_adv = classifier.predict(x_test_adv)

compute_metrics(predictions_adv, df_y_test)


In [None]:
# avaliando o modelo original com dados gerados por FastGradientMethod 
predictions_adv = model.predict(x_test_adv)

compute_metrics(predictions_adv, df_y_test)
