# CNN1D - TRANSFER LEARNING

In [None]:
# Imports modules and packages
import tensorflow as tf
import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tensorflow.keras import datasets, layers, models, mixed_precision
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
from keras.models import load_model, Sequential
from keras.layers import Dense, Dropout
pd.options.mode.chained_assignment = None  # default='warn'
sns.set() # Set seaborn

### Base 4 Train & Base 4 test

In [None]:
data_train = pd.read_csv('~/04_Semester_4/revisi/csv_files/UNSW_NB15_base4_train.csv', low_memory=False)

In [None]:
# Select the 'proto' and 'state' values
data_train= data_train.loc[(data_train['proto'] == 'tcp') | (data_train['proto'] =='udp') | (data_train['proto'] =='icmp') | (data_train['proto'] =='arp') | (data_train['proto'] =='ipv6-icmp') | (data_train['proto'] =='igmp') | (data_train['proto'] =='rarp'), :]
data_train = data_train.loc[(data_train['state'] == 'RST') | (data_train['state'] =='REQ') | (data_train['state'] =='INT') | (data_train['state'] =='FIN') | (data_train['state'] =='CON') | (data_train['state'] =='ECO') | (data_train['state'] =='ACC') | (data_train['state'] =='PAR'), :]

# Extracting dataset labels
data_labels=data_train[['label']]

# Drop the invalid features and select data features
data_features=data_train[['proto','srcip','sport','dstip','dsport','spkts','dpkts','sbytes','dbytes','state','stime','ltime','dur']]

In [None]:
# Preprocess IP and ports features
# IP Source Address
data_features['srcip'] = data_features['srcip'].apply(lambda x: x.split(".")[-1])
data_features['srcip'] = data_features['srcip'].apply(lambda x: x.split(":")[-1])
data_features['srcip'] = data_features['srcip'].apply(lambda x: int(x, 16))

# IP Destination Address
data_features['dstip'] = data_features['dstip'].apply(lambda x: x.split(".")[-1])
data_features['dstip'] = data_features['dstip'].apply(lambda x: x.split(":")[-1])
data_features['dstip'] = data_features['dstip'].apply(lambda x: int(x, 16))

# Ports
data_features['sport'] = data_features['sport'].apply(lambda x: x.replace('0x','') if "0x" in str(x) else x)
data_features['dsport'] = data_features['dsport'].apply(lambda x: x.replace('0x','') if "0x" in str(x) else x)

# Convert all ports with 0 decimal, and HEX to DEC
data_features['sport'] = data_features['sport'].apply(lambda x: str(x)[:-2] if str(x)[-2:] == '.0' else str(x))
data_features['sport'] = data_features['sport'].apply(lambda x: -1 if str(x).isalpha()==True else int(x,16))

data_features['dsport'] = data_features['dsport'].apply(lambda x: str(x)[:-2] if str(x)[-2:] == '.0' else str(x))
data_features['dsport'] = data_features['dsport'].apply(lambda x: "None" if x=="-" else x)
data_features['dsport'] = data_features['dsport'].apply(lambda x: -1 if str(x).isalpha()==True else int(x,16))

# Convert some fields to logarithmic
log1p_col = ['dur', 'sbytes', 'dbytes', 'spkts']

for col in log1p_col:
    data_features[col] = data_features[col].apply(np.log1p)

# Transform to One hot encoding - FEATURES
data_features=pd.get_dummies(data_features)

# Normalize all data features
data_features = StandardScaler().fit_transform(data_features)

#Add dimension to data features
data_features = np.expand_dims(data_features, axis=2)

x_train1, y_train1=data_features, data_labels

In [None]:
print(x_train1.shape)
print(y_train1.shape)
print(y_train1.value_counts())

In [None]:
data = pd.read_csv('~/04_Semester_4/revisi/csv_files/UNSW_NB15_base4_test.csv', low_memory=False)

In [None]:
data.shape

In [None]:
# Select the 'proto' and 'state' values that I want
data = data.loc[(data['proto'] == 'tcp') | (data['proto'] =='udp') | (data['proto'] =='icmp') | (data['proto'] =='arp') | (data['proto'] =='ipv6-icmp') | (data['proto'] =='igmp') | (data['proto'] =='rarp'), :]
data = data.loc[(data['state'] == 'RST') | (data['state'] =='REQ') | (data['state'] =='INT') | (data['state'] =='FIN') | (data['state'] =='CON') | (data['state'] =='ECO') | (data['state'] =='ACC') | (data['state'] =='PAR'), :]

# Extracting dataset labels
data_labels=data[['label']]

# Drop the invalid features and select interested data features
data_features=data[['proto','srcip','sport','dstip','dsport','spkts','dpkts','sbytes','dbytes','state','stime','ltime','dur']]

"""PREPROCESSING"""

# Preprocess IP and ports features
# IP Source Address
data_features['srcip'] = data_features['srcip'].apply(lambda x: x.split(".")[-1])
data_features['srcip'] = data_features['srcip'].apply(lambda x: x.split(":")[-1])
data_features['srcip'] = data_features['srcip'].apply(lambda x: int(x, 16))

# IP Destination Address
data_features['dstip'] = data_features['dstip'].apply(lambda x: x.split(".")[-1])
data_features['dstip'] = data_features['dstip'].apply(lambda x: x.split(":")[-1])
data_features['dstip'] = data_features['dstip'].apply(lambda x: int(x, 16))

# Ports
data_features['sport'] = data_features['sport'].apply(lambda x: x.replace('0x','') if "0x" in str(x) else x)
data_features['dsport'] = data_features['dsport'].apply(lambda x: x.replace('0x','') if "0x" in str(x) else x)

# Convert all ports with 0 decimal, and HEX to DEC
data_features['sport'] = data_features['sport'].apply(lambda x: str(x)[:-2] if str(x)[-2:] == '.0' else str(x))
data_features['sport'] = data_features['sport'].apply(lambda x: -1 if str(x).isalpha()==True else int(x,16))

data_features['dsport'] = data_features['dsport'].apply(lambda x: str(x)[:-2] if str(x)[-2:] == '.0' else str(x))
data_features['dsport'] = data_features['dsport'].apply(lambda x: "None" if x=="-" else x)
data_features['dsport'] = data_features['dsport'].apply(lambda x: -1 if str(x).isalpha()==True else int(x,16))

# Convert field to int format
data_features['srcip'] = data_features['srcip'].astype(int)
data_features['sport'] = data_features['sport'].astype(int)
data_features['dstip'] = data_features['dstip'].astype(int)
data_features['dsport'] = data_features['dsport'].astype(int)

# Convert some fields to logarithmic
log1p_col = ['dur', 'sbytes', 'dbytes', 'spkts']

for col in log1p_col:
    data_features[col] = data_features[col].apply(np.log1p)


# Generate 1 new columns to fit with training
auxCol=data_features['sbytes']
auxCol=0
data_features.insert(13, 'proto_igmp', auxCol, True)

# Insert the new column in data labels
data_labels = pd.get_dummies(data_labels)

# Transform to One hot encoding - FEATURES
data_features=pd.get_dummies(data_features)

# Normalize all data features
data_features = StandardScaler().fit_transform(data_features)

#Add dimension to data features
data_features = np.expand_dims(data_features, axis=2)

x_test1, y_test1=data_features, data_labels

In [None]:
print(x_test1.shape)
print(y_test1.shape)
print(y_test1.value_counts())

In [None]:
# Clear Session and Memory Growth
def clear_session():
    tf.keras.backend.clear_session()
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
        except RuntimeError as e:
            print(e)
    return

In [None]:
# Early Stopping
filepath = '../tmp/checkpoint.model2_tl.keras'
callbacks = [
        keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 5),
        keras.callbacks.ModelCheckpoint(filepath = filepath, monitor = 'val_loss',save_best_only = True)]

In [None]:
# Model building and definition
def model_cnn():
    model = Sequential()
    pretrained_model_tl = load_model('model_iotbot_24_512.keras')
    pretrained_model_tl.trainable=False
    pretrained_model_tl.pop()
    pretrained_model_tl.pop()
    pretrained_model_tl.pop() 
    model.add(pretrained_model_tl)
    model.add(layers.Flatten())   
    model.add(Dense(512, activation='relu'))
    model.add(layers.Dropout(0.4))    
    model.add(Dense(256, activation='relu'))
    model.add(layers.Dropout(0.3))
    model.add(Dense(128, activation='relu'))
    model.add(layers.Dropout(0.2))
    adam = keras.optimizers.Adam(learning_rate=6e-5)
    loss = keras.losses.BinaryCrossentropy()
    model.compile(optimizer = adam, loss = loss, metrics = ['accuracy'])
    return model

In [None]:
model_cnn().summary()

In [None]:
# Clear session and Memory Growth
clear_session()
# Create a Strategy.
mixed_precision.set_global_policy('mixed_float16')
if tf.config.list_physical_devices('GPU'):
    strategy = tf.distribute.MirroredStrategy()
else:  # Use the Default Strategy
    strategy = tf.distribute.get_strategy()
print("Number of devices: {}".format(strategy.num_replicas_in_sync))
# Open a strategy scope.
with strategy.scope():
    model=model_cnn()
history = model.fit(x_train1, y_train1, validation_data=(x_test1, y_test1), epochs=30, batch_size=2048, callbacks=callbacks)
history1 = history

In [None]:
#Evaluate the model
# 174/174 [=============] - 12s 70ms/step - loss: 0.0338 - accuracy: 0.9889 - val_loss: 0.0300 - val_accuracy: 0.9904, 2e-5
# 174/174 [=============] - 12s 68ms/step - loss: 0.0269 - accuracy: 0.9912 - val_loss: 0.0241 - val_accuracy: 0.9920, 6e-5
# 174/174 [=============] - 12s 69ms/step - loss: 0.0249 - accuracy: 0.9917 - val_loss: 0.0226 - val_accuracy: 0.9922; 1e-4
results1 = model.evaluate(x_test1, y_test1, verbose=1)
print("test loss, test acc:", results1)

In [None]:
# summarize history for loss
train_loss = history.history['loss']
test_loss = history.history['val_loss']
x = list(range(1, len(test_loss) + 1))
plt.plot(x, test_loss, color = 'orange', label = 'Test loss')
plt.plot(x, train_loss, label = 'Training loss')
plt.legend()
plt.grid(visible=True)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss vs. Epoch', weight='bold', fontsize=18)
plt.show()

In [None]:
# summarize history for accuracy
train_acc = history.history['accuracy']
test_acc = history.history['val_accuracy']
x = list(range(1, len(test_acc) + 1))
plt.plot(x, test_acc, color = 'orange', label = 'Test accuracy')
plt.plot(x, train_acc, label = 'Training accuracy')
plt.legend()
plt.grid(visible=True)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy vs. Epoch', weight='bold', fontsize=18)
plt.show()

In [None]:
# Prediction
#model = load_model('/home/riki/project_dl/finale/model/base5_train_test.keras')
predicted1 = model.predict(x_test1)

In [None]:
df1 = pd.DataFrame(y_test1)
df1['predicted'] = predicted1

In [None]:
cf1 = confusion_matrix(y_test1, np.round(predicted1))

In [None]:
cmatrix_df1 = pd.DataFrame(cf1, index = ['Normal', 'Attack'], columns = ['Normal', 'Attack'])
plt.title('Confusion matrix of the base 4 train & test with TL', weight='bold', fontsize=14)
plt.tick_params(length=0)
plt.xlabel('Attacks')
plt.ylabel('Predicted Attacks')
sns.heatmap(cmatrix_df1, annot=True, fmt="d", cbar=False, cmap='Blues')
plt.show()

In [None]:
# Precision, Recall & F1 Score
class_report1 = classification_report(y_test1, np.round(predicted1))
print(class_report1)

In [None]:
# Precision, Recall & F1-Score
class_report1 = classification_report(y_test1, np.round(predicted1), target_names = ['Normal', 'Attack'],output_dict=True) 
plt.tick_params(axis='both', which='major', labelsize=10, labelbottom = False, bottom=False, top = False, labeltop=True)
plt.tick_params(length=0)
sns.heatmap(pd.DataFrame(class_report1).iloc[:-1, :-3].T, square=True, cbar=False,cmap='Blues', annot=True) 
plt.show()

In [None]:
model.save('model/base5_train_test_tl.keras')

In [None]:
# adding normal column in data label
normal=data_labels['label']
normal=normal.replace(1,2)
normal=normal.replace(0,1)
normal=normal.replace(2,0)
data_labels.insert(1, 'normal', normal)

In [None]:
df1 = pd.DataFrame(np.round(predicted1))
df1.index = data.index
df1.columns = ['pred_attack']
df1['pred_normal'] = 1- df1['pred_attack']
df1['attack']=data['label']
df1['category']=data['attack_cat']
df1['normal']=data_labels['normal']
df1.head()

In [None]:
df1.value_counts('category')

In [None]:
dff1=df1
df1=pd.get_dummies(df1)
df1=round(df1)

In [None]:
normal=df1.loc[df1['category_normal'] == 1]
normal=normal[['pred_attack', 'pred_normal', 'attack', 'normal']]
normal_pred=normal[['pred_attack','pred_normal']]
normal_check=normal[['attack','normal']]

generic=df1.loc[df1['category_generic'] == 1]
generic=generic[['pred_attack', 'pred_normal', 'attack', 'normal']]
generic_pred=generic[['pred_attack','pred_normal']]
generic_check=generic[['attack','normal']]

dos=df1.loc[df1['category_dos'] == 1]
dos=dos[['pred_attack', 'pred_normal', 'attack', 'normal']]
dos_pred=dos[['pred_attack','pred_normal']]
dos_check=dos[['attack','normal']]

reconnaissance=df1.loc[df1['category_reconnaissance'] == 1]
reconnaissance=reconnaissance[['pred_attack', 'pred_normal', 'attack', 'normal']]
reconnaissance_pred=reconnaissance[['pred_attack','pred_normal']]
reconnaissance_check=reconnaissance[['attack','normal']]

In [None]:
#Correctly detected
countdata=pd.DataFrame()
normal_c = normal['pred_normal'] * normal['normal']
generic_c = generic['pred_attack'] * generic['attack']
reconnaissance_c = reconnaissance['pred_attack'] * reconnaissance['attack']
dos_c = dos['pred_attack'] * dos['attack']

In [None]:
# Count 0 and 1
normal_c0 = (normal_c == 0).sum()
normal_c1 = len(normal) - normal_c0

reconnaissance_c0 = (reconnaissance_c == 0).sum()
reconnaissance_c1 = len(reconnaissance_c) - reconnaissance_c0

generic_c0 = (generic_c == 0).sum()
generic_c1 = len(generic) - generic_c0

dos_c0 = (dos_c == 0).sum()
dos_c1 = len(dos) - dos_c0

In [None]:
normal_0, normal_1 = normal_c0*100 / len(normal), normal_c1*100 / len(normal)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(normal_0, normal_1))

In [None]:
generic_0, generic_1 = generic_c0*100 / len(generic), generic_c1*100 / len(generic)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(generic_0, generic_1))

In [None]:
reconnaissance_0, reconnaissance_1 = reconnaissance_c0*100 / len(reconnaissance), reconnaissance_c1*100 / len(reconnaissance)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(reconnaissance_0, reconnaissance_1))

In [None]:
dos_0, dos_1 = dos_c0*100 / len(dos), dos_c1*100 / len(dos)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(dos_0, dos_1))

In [None]:
summary1 = pd.DataFrame({"Traffic":
                   ["Normal", "generic", "reconnaissance", "dos"],
                   "Detected %": [normal_1, generic_1, reconnaissance_1 ,dos_1],
                   "No Detected %": [normal_0, generic_0, reconnaissance_0 ,dos_0],
                   "Detected Samples": [normal_c1, generic_c1, reconnaissance_c1 ,dos_c1],
                   "No Detected Samples": [normal_c0, generic_c0, reconnaissance_c0 ,dos_c0]})

In [None]:
dff1['category_pred'] = np.where(((df1['attack'] == 1) & (df1['pred_attack'] == 1)) | ((df1['normal'] == 1) & (df1['pred_normal'] == 1)), 'Detected', 'No Detected')


In [None]:
data_plot1=dff1[['category', 'category_pred']]

In [None]:
data_plot1

In [None]:
# Plotting target label distribution
plt.figure(figsize=(15,5))
sns.set_theme(style="darkgrid")
sns.countplot(x=data_plot1['category'], data=data_plot1, palette='CMRmap', hue=data_plot1['category_pred'])
plt.title('Deteksi serangan pada Base 4 Test dengan TL', weight='bold', fontsize='18')
plt.xticks(weight='bold', fontsize=12)
plt.show()

In [None]:
summary1

In [None]:
summary1.to_csv('~/04_Semester_4/revisi/csv_files/summary_base5_train_test_tl.csv', index=False)

#04_Semester_4/revisi/model/model_iotbot_10.keras
model.save('../model/base5_train_test_tl.keras')

## Base 4 Train & First Test

In [None]:
# Load base 4 data train
data_train = pd.read_csv('~/04_Semester_4/revisi/csv_files/UNSW_NB15_base4_train.csv', low_memory=False)

In [None]:
# Select the 'proto' and 'state' values
data_train= data_train.loc[(data_train['proto'] == 'tcp') | (data_train['proto'] =='udp') | (data_train['proto'] =='icmp') | (data_train['proto'] =='arp') | (data_train['proto'] =='ipv6-icmp') | (data_train['proto'] =='igmp') | (data_train['proto'] =='rarp'), :]
data_train = data_train.loc[(data_train['state'] == 'RST') | (data_train['state'] =='REQ') | (data_train['state'] =='INT') | (data_train['state'] =='FIN') | (data_train['state'] =='CON') | (data_train['state'] =='ECO') | (data_train['state'] =='ACC') | (data_train['state'] =='PAR'), :]

# Extracting dataset labels
data_labels=data_train[['label']]

# Drop the invalid features and select data features
data_features=data_train[['proto','srcip','sport','dstip','dsport','spkts','dpkts','sbytes','dbytes','state','stime','ltime','dur']]

In [None]:
# Preprocess IP and ports features
# IP Source Address
data_features['srcip'] = data_features['srcip'].apply(lambda x: x.split(".")[-1])
data_features['srcip'] = data_features['srcip'].apply(lambda x: x.split(":")[-1])
data_features['srcip'] = data_features['srcip'].apply(lambda x: int(x, 16))

# IP Destination Address
data_features['dstip'] = data_features['dstip'].apply(lambda x: x.split(".")[-1])
data_features['dstip'] = data_features['dstip'].apply(lambda x: x.split(":")[-1])
data_features['dstip'] = data_features['dstip'].apply(lambda x: int(x, 16))

# Ports
data_features['sport'] = data_features['sport'].apply(lambda x: x.replace('0x','') if "0x" in str(x) else x)
data_features['dsport'] = data_features['dsport'].apply(lambda x: x.replace('0x','') if "0x" in str(x) else x)

# Convert all ports with 0 decimal, and HEX to DEC
data_features['sport'] = data_features['sport'].apply(lambda x: str(x)[:-2] if str(x)[-2:] == '.0' else str(x))
data_features['sport'] = data_features['sport'].apply(lambda x: -1 if str(x).isalpha()==True else int(x,16))

data_features['dsport'] = data_features['dsport'].apply(lambda x: str(x)[:-2] if str(x)[-2:] == '.0' else str(x))
data_features['dsport'] = data_features['dsport'].apply(lambda x: "None" if x=="-" else x)
data_features['dsport'] = data_features['dsport'].apply(lambda x: -1 if str(x).isalpha()==True else int(x,16))

# Convert some fields to logarithmic
log1p_col = ['dur', 'sbytes', 'dbytes', 'spkts']

for col in log1p_col:
    data_features[col] = data_features[col].apply(np.log1p)

# Transform to One hot encoding - FEATURES
data_features=pd.get_dummies(data_features)

# Normalize all data features
data_features = StandardScaler().fit_transform(data_features)

#Add dimension to data features
data_features = np.expand_dims(data_features, axis=2)

In [None]:
# original dataset
x_train2, y_train2 = data_features, data_labels

In [None]:
print(x_train2.shape)
print(y_train2.shape)
print(y_train2.value_counts())

In [None]:
# Load base 5+ test
data = pd.read_csv('~/04_Semester_4/revisi/csv_files/UNSW_NB15_first_test.csv', low_memory=False)

In [None]:
# Select the 'proto' and 'state' values that I want
data = data.loc[(data['proto'] == 'tcp') | (data['proto'] =='udp') | (data['proto'] =='icmp') | (data['proto'] =='arp') | (data['proto'] =='ipv6-icmp') | (data['proto'] =='igmp') | (data['proto'] =='rarp'), :]
data = data.loc[(data['state'] == 'RST') | (data['state'] =='REQ') | (data['state'] =='INT') | (data['state'] =='FIN') | (data['state'] =='CON') | (data['state'] =='ECO') | (data['state'] =='ACC') | (data['state'] =='PAR'), :]

# Extracting dataset labels
data_labels=data[['label']]

# Drop the invalid features and select interested data features
data_features=data[['proto','srcip','sport','dstip','dsport','spkts','dpkts','sbytes','dbytes','state','stime','ltime','dur']]

"""PREPROCESSING"""

# Preprocess IP and ports features
# IP Source Address
data_features['srcip'] = data_features['srcip'].apply(lambda x: x.split(".")[-1])
data_features['srcip'] = data_features['srcip'].apply(lambda x: x.split(":")[-1])
data_features['srcip'] = data_features['srcip'].apply(lambda x: int(x, 16))

# IP Destination Address
data_features['dstip'] = data_features['dstip'].apply(lambda x: x.split(".")[-1])
data_features['dstip'] = data_features['dstip'].apply(lambda x: x.split(":")[-1])
data_features['dstip'] = data_features['dstip'].apply(lambda x: int(x, 16))

# Ports
data_features['sport'] = data_features['sport'].apply(lambda x: x.replace('0x','') if "0x" in str(x) else x)
data_features['dsport'] = data_features['dsport'].apply(lambda x: x.replace('0x','') if "0x" in str(x) else x)

# Convert all ports with 0 decimal, and HEX to DEC
data_features['sport'] = data_features['sport'].apply(lambda x: str(x)[:-2] if str(x)[-2:] == '.0' else str(x))
data_features['sport'] = data_features['sport'].apply(lambda x: -1 if str(x).isalpha()==True else int(x,16))

data_features['dsport'] = data_features['dsport'].apply(lambda x: str(x)[:-2] if str(x)[-2:] == '.0' else str(x))
data_features['dsport'] = data_features['dsport'].apply(lambda x: "None" if x=="-" else x)
data_features['dsport'] = data_features['dsport'].apply(lambda x: -1 if str(x).isalpha()==True else int(x,16))

# Convert field to int format
data_features['srcip'] = data_features['srcip'].astype(int)
data_features['sport'] = data_features['sport'].astype(int)
data_features['dstip'] = data_features['dstip'].astype(int)
data_features['dsport'] = data_features['dsport'].astype(int)

# Convert some fields to logarithmic
log1p_col = ['dur', 'sbytes', 'dbytes', 'spkts']

for col in log1p_col:
    data_features[col] = data_features[col].apply(np.log1p)

  
# Insert the new column in data labels
data_labels = pd.get_dummies(data_labels)

# Transform to One hot encoding - FEATURES
data_features=pd.get_dummies(data_features)

# Normalize all data features
data_features = StandardScaler().fit_transform(data_features)

#Add dimension to data features
data_features = np.expand_dims(data_features, axis=2)

x_test2, y_test2 = data_features, data_labels

In [None]:
print(x_test2.shape)
print(y_test2.shape)
print(y_test2.value_counts())

In [None]:
# Early Stopping

filepath = '~/project_dl/finale/tmp/checkpoint.model.keras' # define where the model is saved
callbacks = [
        keras.callbacks.EarlyStopping(
            monitor = 'val_loss', # Use accuracy to monitor the model
            patience = 5 # Stop after 3 steps with lower accuracy
        ),
        keras.callbacks.ModelCheckpoint(
            filepath = filepath, # file where the checkpoint is saved
            monitor = 'val_loss', # Don't overwrite the saved model unless val_loss is worse
            save_best_only = True)]# Only save model if it is the best

In [None]:
# Clear session and Memory Growth
clear_session()

# Create a Strategy.
mixed_precision.set_global_policy('mixed_float16')

if tf.config.list_physical_devices('GPU'):
    strategy = tf.distribute.MirroredStrategy()
else:  # Use the Default Strategy
    strategy = tf.distribute.get_strategy()

print("Number of devices: {}".format(strategy.num_replicas_in_sync))

# Open a strategy scope.
with strategy.scope():

    model1=model_cnn()

    #x_train, x_test, y_train, y_test = train_test_split(data_features, data_labels, train_size=0.75, random_state=42,shuffle=True)


# Train the model on all available devices. 3e-5
history = model1.fit(x_train2, y_train2, validation_data=(x_test2, y_test2), epochs=50, batch_size=2048, callbacks=callbacks)
history2 = history

In [None]:
#Evaluate the model
# 174/174 [==============] - 12s 68ms/step - loss: 0.0234 - accuracy: 0.9919 - val_loss: 0.2111 - val_accuracy: 0.9466, 5e-4, epoch 20
# 174/174 [==============] - 12s 67ms/step - loss: 0.0263 - accuracy: 0.9914 - val_loss: 0.2188 - val_accuracy: 0.9380, 1e-4, epoch 22
# 174/174 [==============] - 12s 68ms/step - loss: 0.0270 - accuracy: 0.9911 - val_loss: 0.1964 - val_accuracy: 0.9469, 6e-5, epoch 30
# 174/174 [==============] - 12s 68ms/step - loss: 0.0306 - accuracy: 0.9901 - val_loss: 0.2595 - val_accuracy: 0.9177, 4e-5, epoch 23
# 174/174 [==============] - 12s 66ms/step - loss: 0.0343 - accuracy: 0.9889 - val_loss: 0.2713 - val_accuracy: 0.9055, 2e-5, epoch 30

# 348/348 [==============] - 14s 40ms/step - loss: 0.0278 - accuracy: 0.9908 - val_loss: 0.2063 - val_accuracy: 0.9369, 4e-5
# 87/87 [================] - 16s 187ms/step - loss: 0.0292 - accuracy: 0.9908 - val_loss: 0.1723 - val_accuracy: 0.9559, 6e-5
# 87/87 [================] - 16s 188ms/step - loss: 0.0277 - accuracy: 0.9912 - val_loss: 0.1570 - val_accuracy: 0.9678
# 87/87 [================] - 16s 186ms/step - loss: 0.0283 - accuracy: 0.9909 - val_loss: 0.1604 - val_accuracy: 0.9637, 6.5e-5
# 87/87 [================] - 16s 188ms/step - loss: 0.0266 - accuracy: 0.9915 - val_loss: 0.1595 - val_accuracy: 0.9667
# 87/87 [================] - 16s 190ms/step - loss: 0.0278 - accuracy: 0.9912 - val_loss: 0.1502 - val_accuracy: 0.9669, 7e-5
# 87/87 [================] - 16s 184ms/step - loss: 0.0270 - accuracy: 0.9914 - val_loss: 0.1623 - val_accuracy: 0.9654
# 87/87 [================] - 16s 186ms/step - loss: 0.0279 - accuracy: 0.9910 - val_loss: 0.1629 - val_accuracy: 0.9679, 7.5e-5
# 87/87 [================] - 16s 184ms/step - loss: 0.0276 - accuracy: 0.9911 - val_loss: 0.1593 - val_accuracy: 0.9608, 8e-5

# test loss, test acc: [0.15425102412700653, 0.9671387076377869]

results2 = model1.evaluate(x_test2, y_test2, verbose=1)
print("test loss, test acc:", results2)

In [None]:
# summarize history for loss
train_loss = history.history['loss']
test_loss = history.history['val_loss']
x = list(range(1, len(test_loss) + 1))
plt.plot(x, test_loss, color = 'orange', label = 'Test loss')
plt.plot(x, train_loss, label = 'Training loss')
plt.legend()
plt.grid(visible=True)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss vs. Epoch', weight='bold', fontsize=18)
plt.show()

In [None]:
# summarize history for accuracy
train_acc = history.history['accuracy']
test_acc = history.history['val_accuracy']
x = list(range(1, len(test_acc) + 1))
plt.plot(x, test_acc, color = 'orange', label = 'Test accuracy')
plt.plot(x, train_acc, label = 'Training accuracy')
plt.legend()
plt.grid(visible=True)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy vs. Epoch', weight='bold', fontsize=18)
plt.show()

In [None]:
# Prediction
predicted2 = model1.predict(x_test2)

In [None]:
df2 = pd.DataFrame(y_test2)
df2['predicted'] = predicted2

In [None]:
cf2 = confusion_matrix(y_test2, np.round(predicted2))

In [None]:
cmatrix_df2 = pd.DataFrame(cf2, index = ['Attack', 'Normal'], columns = ['Attack', 'Normal'])
plt.title('Confusion matrix of the base 5+ test with TL', weight='bold', fontsize=14)
plt.tick_params(length=0)
plt.xlabel('Attacks')
plt.ylabel('Predicted Attacks')
sns.heatmap(cmatrix_df2, annot=True, fmt="d", cbar=False, cmap='Blues')
plt.show()

In [None]:
# Precision, Recall & F1 Score
class_report2 = classification_report(y_test2, np.round(predicted2))
print(class_report2)

In [None]:
# Precision, Recall & F1-Score
class_report2 = classification_report(y_test2, np.round(predicted2), target_names = ['Attack', 'Normal'],output_dict=True) 
plt.tick_params(axis='both', which='major', labelsize=10, labelbottom = False, bottom=False, top = False, labeltop=True)
plt.tick_params(length=0)
sns.heatmap(pd.DataFrame(class_report2).iloc[:-1, :-3].T, square=True, cbar=False,cmap='Blues', annot=True) 
plt.show()

In [None]:
# adding normal column in data label
normal=data_labels['label']
normal=normal.replace(1,2)
normal=normal.replace(0,1)
normal=normal.replace(2,0)
data_labels.insert(1, 'normal', normal)

In [None]:
df2 = pd.DataFrame(np.round(predicted2))
df2.index = data.index
df2.columns = ['pred_attack']
df2['pred_normal'] = 1- df2['pred_attack']
df2['attack']=data['label']
df2['category']=data['attack_cat']
df2['normal']=data_labels['normal']
df2.head()

In [None]:
df2.value_counts('category')

In [None]:
dff2=df2
df2=pd.get_dummies(df2)
df2=round(df2)

In [None]:
normal=df2.loc[df2['category_normal'] == 1]
normal=normal[['pred_attack', 'pred_normal', 'attack', 'normal']]
normal_pred=normal[['pred_attack','pred_normal']]
normal_check=normal[['attack','normal']]

exploits=df2.loc[df2['category_exploits'] == 1]
exploits=exploits[['pred_attack', 'pred_normal', 'attack', 'normal']]
exploits_pred=exploits[['pred_attack','pred_normal']]
exploits_check=exploits[['attack','normal']]

fuzzers=df2.loc[df2['category_fuzzers'] == 1]
fuzzers=fuzzers[['pred_attack', 'pred_normal', 'attack', 'normal']]
fuzzers_pred=fuzzers[['pred_attack','pred_normal']]
fuzzers_check=fuzzers[['attack','normal']]

analysis=df2.loc[df2['category_analysis'] == 1]
analysis=analysis[['pred_attack', 'pred_normal', 'attack', 'normal']]
analysis_pred=analysis[['pred_attack','pred_normal']]
analysis_check=analysis[['attack','normal']]

backdoor=df2.loc[df2['category_backdoor'] == 1]
backdoor=backdoor[['pred_attack', 'pred_normal', 'attack', 'normal']]
backdoor_pred=backdoor[['pred_attack','pred_normal']]
backdoor_check=backdoor[['attack','normal']]

shellcode=df2.loc[df2['category_shellcode'] == 1]
shellcode=shellcode[['pred_attack', 'pred_normal', 'attack', 'normal']]
shellcode_pred=shellcode[['pred_attack','pred_normal']]
shellcode_check=shellcode[['attack','normal']]

worms=df2.loc[df2['category_worms'] == 1]
worms=worms[['pred_attack', 'pred_normal', 'attack', 'normal']]
worms_pred=worms[['pred_attack','pred_normal']]
worms_check=worms[['attack','normal']]

In [None]:
#Correctly detected
countdata=pd.DataFrame()
normal_c = normal['pred_normal'] * normal['normal']
exploits_c = exploits['pred_attack'] * exploits['attack']
fuzzers_c = fuzzers['pred_attack'] * fuzzers['attack']
analysis_c = analysis['pred_attack'] * analysis['attack']
backdoor_c = backdoor['pred_attack'] * backdoor['attack']
shellcode_c = shellcode['pred_attack'] * shellcode['attack']
worms_c = worms['pred_attack'] * worms['attack']

In [None]:
# Count 0 and 1
normal_c0 = (normal_c == 0).sum()
normal_c1 = len(normal) - normal_c0

exploits_c0 = (exploits_c == 0).sum()
exploits_c1 = len(exploits) - exploits_c0

fuzzers_c0 = (fuzzers_c == 0).sum()
fuzzers_c1 = len(fuzzers) - fuzzers_c0

analysis_c0 = (analysis_c == 0).sum()
analysis_c1 = len(analysis) - analysis_c0

backdoor_c0 = (backdoor_c == 0).sum()
backdoor_c1 = len(backdoor) - backdoor_c0

shellcode_c0 = (shellcode_c == 0).sum()
shellcode_c1 = len(shellcode) - shellcode_c0

worms_c0 = (worms_c == 0).sum()
worms_c1 = len(worms) - worms_c0

In [None]:
normal_0, normal_1 = normal_c0*100 / len(normal), normal_c1*100 / len(normal)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(normal_0, normal_1))

In [None]:
analysis_0, analysis_1 = analysis_c0*100 / len(analysis), analysis_c1*100 / len(analysis)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(analysis_0, analysis_1))

In [None]:
backdoor_0, backdoor_1 = backdoor_c0*100 / len(backdoor), backdoor_c1*100 / len(backdoor)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(backdoor_0, backdoor_1))

In [None]:
shellcode_0, shellcode_1 = shellcode_c0*100 / len(shellcode), shellcode_c1*100 / len(shellcode)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(shellcode_0, shellcode_1))

In [None]:
worms_0, worms_1 = worms_c0*100 / len(worms), worms_c1*100 / len(worms)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(worms_0, worms_1))

In [None]:
exploits_0, exploits_1 = exploits_c0*100 / len(exploits), exploits_c1*100 / len(exploits)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(exploits_0, exploits_1))

In [None]:
fuzzers_0, fuzzers_1 = fuzzers_c0*100 / len(fuzzers), fuzzers_c1*100 / len(fuzzers)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(fuzzers_0, fuzzers_1))

In [None]:
summary2 = pd.DataFrame({"Traffic":
                   ["Normal", "Exploits","Fuzzers", "Analysis", "Backdoor", "Shellcode", "Worms"],
                   "Detected %": [normal_1, exploits_1, fuzzers_1,analysis_1, backdoor_1, shellcode_1, worms_1],
                   "No Detected %": [normal_0, exploits_0, fuzzers_0, analysis_0, backdoor_0, shellcode_0, worms_0],
                   "Detected Samples": [normal_c1, exploits_c1, fuzzers_c1, analysis_c1, backdoor_c1, shellcode_c1, worms_c1],
                   "No Detected Samples": [normal_c0,  exploits_c0, fuzzers_c0, analysis_c0, backdoor_c0, shellcode_c0, worms_c0]})

In [None]:
dff2['category_pred'] = np.where(((df2['attack'] == 1) & (df2['pred_attack'] == 1)) | ((df2['normal'] == 1) & (df2['pred_normal'] == 1)), 'Detected', 'No Detected')

In [None]:
data_plot2=dff2[['category', 'category_pred']]

In [None]:
data_plot2

In [None]:
# Plotting target label distribution
plt.figure(figsize=(15,5))
sns.set_theme(style="darkgrid")
sns.countplot(x=data_plot2['category'], data=data_plot2,palette='CMRmap', hue=data_plot2['category_pred'])
plt.title('Deteksi Serangan pada First Test dengan TL', weight='bold', fontsize='18')
plt.xticks(weight='bold', fontsize=12)
plt.show()

In [None]:
summary2

In [None]:
summary2.to_csv('~/04_Semester_4/revisi/csv_files/summary_base5+_test_tl.csv', index=False)

In [None]:
model1.save('model/base5_test_plus_tl.keras')

In [None]:
#model.save('/home/riki/project_dl/finale/model/base5+_cnn1d_tl.h5')

### C. Base5 Train & Full Test

In [None]:
x_train3, y_train3 = x_train2, y_train2

In [None]:
data = pd.read_csv('~/04_Semester_4/revisi/csv_files/UNSW_NB15_full_test.csv', low_memory=False)

In [None]:
# Select the 'proto' and 'state' values that I want
data = data.loc[(data['proto'] == 'tcp') | (data['proto'] =='udp') | (data['proto'] =='icmp') | (data['proto'] =='arp') | (data['proto'] =='ipv6-icmp') | (data['proto'] =='igmp') | (data['proto'] =='rarp'), :]
data = data.loc[(data['state'] == 'RST') | (data['state'] =='REQ') | (data['state'] =='INT') | (data['state'] =='FIN') | (data['state'] =='CON') | (data['state'] =='ECO') | (data['state'] =='ACC') | (data['state'] =='PAR'), :]

# Extracting dataset labels
data_labels=data[['label']]

# Drop the invalid features and select interested data features
data_features=data[['proto','srcip','sport','dstip','dsport','spkts','dpkts','sbytes','dbytes','state','stime','ltime','dur']]

"""PREPROCESSING"""

# Preprocess IP and ports features
# IP Source Address
data_features['srcip'] = data_features['srcip'].apply(lambda x: x.split(".")[-1])
data_features['srcip'] = data_features['srcip'].apply(lambda x: x.split(":")[-1])
data_features['srcip'] = data_features['srcip'].apply(lambda x: int(x, 16))

# IP Destination Address
data_features['dstip'] = data_features['dstip'].apply(lambda x: x.split(".")[-1])
data_features['dstip'] = data_features['dstip'].apply(lambda x: x.split(":")[-1])
data_features['dstip'] = data_features['dstip'].apply(lambda x: int(x, 16))

# Ports
data_features['sport'] = data_features['sport'].apply(lambda x: x.replace('0x','') if "0x" in str(x) else x)
data_features['dsport'] = data_features['dsport'].apply(lambda x: x.replace('0x','') if "0x" in str(x) else x)

# Convert all ports with 0 decimal, and HEX to DEC
data_features['sport'] = data_features['sport'].apply(lambda x: str(x)[:-2] if str(x)[-2:] == '.0' else str(x))
data_features['sport'] = data_features['sport'].apply(lambda x: -1 if str(x).isalpha()==True else int(x,16))

data_features['dsport'] = data_features['dsport'].apply(lambda x: str(x)[:-2] if str(x)[-2:] == '.0' else str(x))
data_features['dsport'] = data_features['dsport'].apply(lambda x: "None" if x=="-" else x)
data_features['dsport'] = data_features['dsport'].apply(lambda x: -1 if str(x).isalpha()==True else int(x,16))

# Convert field to int format
data_features['srcip'] = data_features['srcip'].astype(int)
data_features['sport'] = data_features['sport'].astype(int)
data_features['dstip'] = data_features['dstip'].astype(int)
data_features['dsport'] = data_features['dsport'].astype(int)

# Convert some fields to logarithmic
log1p_col = ['dur', 'sbytes', 'dbytes', 'spkts']

for col in log1p_col:
    data_features[col] = data_features[col].apply(np.log1p)
    
# Insert the new column in data labels
data_labels = pd.get_dummies(data_labels)

# Transform to One hot encoding - FEATURES
data_features=pd.get_dummies(data_features)

# Normalize all data features
data_features = StandardScaler().fit_transform(data_features)

#Add dimension to data features
data_features = np.expand_dims(data_features, axis=2)

x_test3, y_test3=data_features, data_labels

In [None]:
print(x_test3.shape)
print(y_test3.shape)
print(y_test3.value_counts())

In [None]:
# Clear session and Memory Growth
clear_session()

# Create a Strategy.
mixed_precision.set_global_policy('mixed_float16')

if tf.config.list_physical_devices('GPU'):
    strategy = tf.distribute.MirroredStrategy()
else:  # Use the Default Strategy
    strategy = tf.distribute.get_strategy()

print("Number of devices: {}".format(strategy.num_replicas_in_sync))

# Open a strategy scope.
with strategy.scope():

    model=model_cnn()

    #x_train, x_test, y_train, y_test = train_test_split(data_features, data_labels, train_size=0.75, random_state=42,shuffle=True)


# Train the model on all available devices.
history = model.fit(x_train3, y_train3, validation_data=(x_test3, y_test3), epochs=50, batch_size=2048, callbacks=callbacks)
history3 = history

In [None]:
#Evaluate the model
# 174/174 [=========] - 20s 116ms/step - loss: 0.0272 - accuracy: 0.9911 - val_loss: 0.0536 - val_accuracy: 0.9886. 6e-5
# 174/174 [=========] - 20s 114ms/step - loss: 0.0341 - accuracy: 0.9888 - val_loss: 0.0624 - val_accuracy: 0.9823. 2e-5
# 87/87 [===========] 18s 213ms/step - loss: 0.0295 - accuracy: 0.9904 - val_loss: 0.0564 - val_accuracy: 0.9862, 6e-5
results4 = model.evaluate(x_test3, y_test3, verbose=1)
#test loss, test acc: [0.07396313548088074, 0.9728277325630188]

print("test loss, test acc:", results4)

In [None]:
results3 = model1.evaluate(x_test3, y_test3, verbose=1)
#test loss, test acc: [0.07396313548088074, 0.9728277325630188]
# 18829/18829 [==============================] - 114s 6ms/step - loss: 0.0509 - accuracy: 0.9882, 6e-5
# 18829/18829 [==============================] - 105s 6ms/step - loss: 0.0532 - accuracy: 0.9872


In [None]:
# summarize history for loss
train_loss = history.history['loss']
test_loss = history.history['val_loss']
x = list(range(1, len(test_loss) + 1))
plt.plot(x, test_loss, color = 'orange', label = 'Test loss')
plt.plot(x, train_loss, label = 'Training loss')
plt.legend()
plt.grid(visible=True)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss vs. Epoch', weight='bold', fontsize=18)
plt.show()

In [None]:
# summarize history for accuracy
train_acc = history.history['accuracy']
test_acc = history.history['val_accuracy']
x = list(range(1, len(test_acc) + 1))
plt.plot(x, test_acc, color = 'orange', label = 'Test accuracy')
plt.plot(x, train_acc, label = 'Training accuracy')
plt.legend()
plt.grid(visible=True)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy vs. Epoch', weight='bold', fontsize=18)
plt.show()

model =  load_model('model/base5_test_plus_tl.keras')

In [None]:
# Prediction
predicted3 = model.predict(x_test3)

In [None]:
df3 = pd.DataFrame(y_test3)
df3['predicted'] = predicted3

In [None]:
cf3 = confusion_matrix(y_test3, np.round(predicted3))

In [None]:
cmatrix_df3 = pd.DataFrame(cf3, index = ['Attack', 'Normal'], columns = ['Attack', 'Normal'])
plt.title('Confusion matrix dari Full test', weight='bold', fontsize=14)
plt.tick_params(length=0)
plt.xlabel('Attacks')
plt.ylabel('Predicted Attacks')
sns.heatmap(cmatrix_df3, annot=True, fmt="d", cbar=False, cmap='Blues')
plt.show()

In [None]:
# Precision, Recall & F1 Score
class_report3 = classification_report(y_test3, np.round(predicted3))
print(class_report3)

In [None]:
# Precision, Recall & F1-Score
class_report3 = classification_report(y_test3, np.round(predicted3), target_names = ['Attack', 'Normal'],output_dict=True) 
plt.tick_params(axis='both', which='major', labelsize=10, labelbottom = False, bottom=False, top = False, labeltop=True)
plt.tick_params(length=0)
sns.heatmap(pd.DataFrame(class_report3).iloc[:-1, :-3].T, square=True, cbar=False,cmap='Blues', annot=True) 
plt.show()

In [None]:
class_report3

In [None]:
# adding normal column in data label
normal=data_labels['label']
normal=normal.replace(1,2)
normal=normal.replace(0,1)
normal=normal.replace(2,0)
data_labels.insert(1, 'normal', normal)

In [None]:
df3 = pd.DataFrame(np.round(predicted3))
df3.index = y_test3.index
df3.columns = ['pred_attack']
df3['pred_normal'] = 1- df3['pred_attack']
df3['attack']=data['label']
df3['category']=data['attack_cat']
df3['normal']=data_labels['normal']
df3.head()

In [None]:
df3.value_counts('category')

In [None]:
dff3=df3
df3=pd.get_dummies(df3)
df3=round(df3)

In [None]:
analysis=df3.loc[df3['category_analysis'] == 1]
analysis=analysis[['pred_attack', 'pred_normal', 'attack', 'normal']]
analysis_pred=analysis[['pred_attack','pred_normal']]
analysis_check=analysis[['attack','normal']]

backdoor=df3.loc[df3['category_backdoor'] == 1]
backdoor=backdoor[['pred_attack', 'pred_normal', 'attack', 'normal']]
backdoor_pred=backdoor[['pred_attack','pred_normal']]
backdoor_check=backdoor[['attack','normal']]

fuzzers=df3.loc[df3['category_fuzzers'] == 1]
fuzzers=fuzzers[['pred_attack', 'pred_normal', 'attack', 'normal']]
fuzzers_pred=fuzzers[['pred_attack','pred_normal']]
fuzzers_check=fuzzers[['attack','normal']]

normal=df3.loc[df3['category_normal'] == 1]
normal=normal[['pred_attack', 'pred_normal', 'attack', 'normal']]
normal_pred=normal[['pred_attack','pred_normal']]
normal_check=normal[['attack','normal']]

shellcode=df3.loc[df3['category_shellcode'] == 1]
shellcode=shellcode[['pred_attack', 'pred_normal', 'attack', 'normal']]
shellcode_pred=shellcode[['pred_attack','pred_normal']]
shellcode_check=shellcode[['attack','normal']]

worms=df3.loc[df3['category_worms'] == 1]
worms=worms[['pred_attack', 'pred_normal', 'attack', 'normal']]
worms_pred=worms[['pred_attack','pred_normal']]
worms_check=worms[['attack','normal']]

generic=df3.loc[df3['category_generic'] == 1]
generic=generic[['pred_attack', 'pred_normal', 'attack', 'normal']]
generic_pred=generic[['pred_attack','pred_normal']]
generic_check=generic[['attack','normal']]

exploits=df3.loc[df3['category_exploits'] == 1]
exploits=exploits[['pred_attack', 'pred_normal', 'attack', 'normal']]
exploits_pred=exploits[['pred_attack','pred_normal']]
exploits_check=exploits[['attack','normal']]

reconnaissance=df3.loc[df3['category_reconnaissance'] == 1]
reconnaissance=reconnaissance[['pred_attack', 'pred_normal', 'attack', 'normal']]
reconnaissance_pred=reconnaissance[['pred_attack','pred_normal']]
reconnaissance_check=reconnaissance[['attack','normal']]

dos=df3.loc[df3['category_dos'] == 1]
dos=dos[['pred_attack', 'pred_normal', 'attack', 'normal']]
dos_pred=dos[['pred_attack','pred_normal']]
dos_check=dos[['attack','normal']]

In [None]:
#Correctly detected
countdata=pd.DataFrame()
normal_c = normal['pred_normal'] * normal['normal']
analysis_c = analysis['pred_attack'] * analysis['attack']
backdoor_c = backdoor['pred_attack'] * backdoor['attack']
fuzzers_c = fuzzers['pred_attack'] * fuzzers['attack']
shellcode_c = shellcode['pred_attack'] * shellcode['attack']
worms_c = worms['pred_attack'] * worms['attack']
generic_c = generic['pred_attack'] * generic['attack']
exploits_c = exploits['pred_attack'] * exploits['attack']
reconnaissance_c = reconnaissance['pred_attack'] * reconnaissance['attack']
dos_c = dos['pred_attack'] * dos['attack']

In [None]:
# Count 0 and 1
normal_c0 = (normal_c == 0).sum()
normal_c1 = len(normal) - normal_c0

analysis_c0 = (analysis_c == 0).sum()
analysis_c1 = len(analysis) - analysis_c0

backdoor_c0 = (backdoor_c == 0).sum()
backdoor_c1 = len(backdoor) - backdoor_c0

fuzzers_c0 = (fuzzers_c == 0).sum()
fuzzers_c1 = len(fuzzers) - fuzzers_c0

shellcode_c0 = (shellcode_c == 0).sum()
shellcode_c1 = len(shellcode) - shellcode_c0

worms_c0 = (worms_c == 0).sum()
worms_c1 = len(worms) - worms_c0

generic_c0 = (generic_c == 0).sum()
generic_c1 = len(generic) - generic_c0

exploits_c0 = (exploits_c == 0).sum()
exploits_c1 = len(exploits) - exploits_c0

reconnaissance_c0 = (reconnaissance_c == 0).sum()
reconnaissance_c1 = len(reconnaissance) - reconnaissance_c0

dos_c0 = (dos_c == 0).sum()
dos_c1 = len(dos) - dos_c0

In [None]:
normal_0, normal_1 = normal_c0*100 / len(normal), normal_c1*100 / len(normal)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(normal_0, normal_1))

In [None]:
analysis_0, analysis_1 = analysis_c0*100 / len(analysis), analysis_c1*100 / len(analysis)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(analysis_0, analysis_1))

In [None]:
backdoor_0, backdoor_1 = backdoor_c0*100 / len(backdoor), backdoor_c1*100 / len(backdoor)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(backdoor_0, backdoor_1))

In [None]:
fuzzers_0, fuzzers_1 = fuzzers_c0*100 / len(fuzzers), fuzzers_c1*100 / len(fuzzers)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(fuzzers_0, fuzzers_1))


In [None]:
shellcode_0, shellcode_1 = shellcode_c0*100 / len(shellcode), shellcode_c1*100 / len(shellcode)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(shellcode_0, shellcode_1))


In [None]:
worms_0, worms_1 = worms_c0*100 / len(worms), worms_c1*100 / len(worms)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(worms_0, worms_1))

In [None]:
generic_0, generic_1 = generic_c0*100 / len(generic), generic_c1*100 / len(generic)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(generic_0, generic_1))


In [None]:
exploits_0, exploits_1 = exploits_c0*100 / len(exploits), exploits_c1*100 / len(exploits)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(exploits_0, exploits_1))

In [None]:
reconnaissance_0, reconnaissance_1 = reconnaissance_c0*100 / len(reconnaissance), reconnaissance_c1*100 / len(reconnaissance)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(reconnaissance_0, reconnaissance_1))

In [None]:
dos_0, dos_1 = dos_c0*100 / len(dos), dos_c1*100 / len(dos)
print("There are {:.2f} % of NO detected and {:.2f} % of detected samples".format(dos_0, dos_1))

In [None]:
summary3 = pd.DataFrame({"Traffic":
                   ["Normal", "Analysis", "Backdoor", "Fuzzers", "Shellcode", "Worms", "Generic", "Exploits", "Reconnaissance", "DoS"],
                   "Detected %": [normal_1, analysis_1, backdoor_1, fuzzers_1, shellcode_1, worms_1, generic_1, exploits_1, reconnaissance_1, dos_1],
                   "No Detected %": [normal_0, analysis_0, backdoor_0, fuzzers_0, shellcode_0, worms_0, generic_0, exploits_0, reconnaissance_0, dos_0],
                   "Detected Samples": [normal_c1, analysis_c1, backdoor_c1, fuzzers_c1, shellcode_c1, worms_c1, generic_c1, exploits_c1, reconnaissance_c1, dos_c1],
                   "No Detected Samples": [normal_c0, analysis_c0, backdoor_c0, fuzzers_c0, shellcode_c0, worms_c0, generic_c0, exploits_c0, reconnaissance_c0, dos_c0]})

In [None]:
dff3['category_pred'] = np.where(((df3['attack'] == 1) & (df3['pred_attack'] == 1)) | ((df3['normal'] == 1) & (df3['pred_normal'] == 1)), 'Detected', 'No Detected')

In [None]:
data_plot3=dff3[['category', 'category_pred']]

In [None]:
data_plot3

In [None]:
# Plotting target label distribution
plt.figure(figsize=(15,5))
sns.set_theme(style="darkgrid")
sns.countplot(x=data_plot3['category'], data=data_plot3, hue=data_plot3['category_pred'])
plt.title('Deteksi Serangan di Full Test Dataset', weight='bold', fontsize='18')
plt.xticks(weight='bold', fontsize=12)
plt.show()

In [None]:
summary3

In [None]:
summary3.to_csv('~/04_Semester_4/revisi/csv_files/summary_base_test_with_tl.csv', index=False)

In [None]:
summary2_tl = pd.read_csv('~/04_Semester_4/revisi/csv_files/summary_base5+_test_tl.csv')

In [None]:
improve = pd.read_csv('~/04_Semester_4/revisi/csv_files/summary_first_test.csv')

In [None]:
improve['Detected %'] = summary2_tl['Detected %'] - improve['Detected %']

In [None]:
improve['Detected Samples'] = summary2_tl['Detected Samples'] - improve['Detected Samples']

In [None]:
del improve['No Detected %']

In [None]:
del improve['No Detected Samples']

In [None]:
improve.rename(columns={'Detected %':'Detection Improvement %',
                        'Detected Samples' : 'Detection Improvement'}, inplace=True)

In [None]:
improve

In [None]:
summary3_tl = pd.read_csv('~/04_Semester_4/revisi/csv_files/summary_base_test_with_tl.csv')

In [None]:
improve =pd.read_csv('~/04_Semester_4/revisi/csv_files/summary_full_test.csv')

In [None]:
improve['Detected %'] = summary3_tl['Detected %'] - improve['Detected %']

In [None]:
improve['Detected Samples'] = summary3_tl['Detected Samples'] - improve['Detected Samples']

In [None]:
del improve['No Detected %']

In [None]:
del improve['No Detected Samples']

In [None]:
improve.rename(columns={'Detected %':'Detection Improvement',
                        'Detected Samples %' : 'Detection Improvement'}, inplace=True)

In [None]:
improve