# Data Imports/Pre-Processing:

In [None]:
# Importing KYOTO-2006 data.
!pip install --upgrade --no-cache-dir gdown
!gdown https://drive.google.com/uc?id=1-2p3ktXN6k23PFPL9w82psoRlgDBfU_N

# Converting the csv file to dataframe.
import pandas as pd
kyoto = pd.read_csv('KYOTO_features.csv')
kyoto = kyoto.drop(labels = 'Unnamed: 0', axis = 1)

In [None]:
# Importing UNSW-NB15 data.
!gdown https://drive.google.com/uc?id=1-2GzNzitcNAkS8o3AjGt5s7ObExmc9wZ

# Converting the csv file to dataframe.
unsw = pd.read_csv('UNSW_features.csv')
unsw = unsw.drop(labels = 'Unnamed: 0', axis = 1)

In [None]:
# Converting the 'dsport' and 'sport' columns of UNSW-NB15 to numerical columns.

# Converting the 'sport' column of UNSW-NB15.
mask = unsw['sport'].apply(lambda x: type(x) == int or (type(x) == str and x.isnumeric()))

unsw = unsw.loc[mask]
unsw['sport'] = unsw['sport'].astype(int)

# Converting the 'dsport' column of UNSW-NB15.
mask = unsw['dsport'].apply(lambda x: type(x) == int or (type(x) == str and x.isnumeric()))

unsw = unsw.loc[mask]
unsw['dsport'] = unsw['dsport'].astype(int)

In [None]:
# Performing label encoding on the categorical features of KYOTO-2006.
from sklearn.preprocessing import LabelEncoder
categorical_features = ['Source_IP_addr', 'Dest_IP_addr', 'Protocol', 'service']
for i in categorical_features:
  label_encoder = LabelEncoder()

  label_encoder.fit(kyoto[i])

  kyoto[i] = label_encoder.transform(kyoto[i])

In [None]:
# Performing label encoding on the categorical features of UNSW-NB15.
categorical_features = ['dstip', 'service', 'proto', 'srcip']
for x in categorical_features:
  label_encoder = LabelEncoder()

  label_encoder.fit(unsw[x])

  unsw[x] = label_encoder.transform(unsw[x])

In [None]:
# Dropping the null values of UNSW-NB15.
unsw = unsw.dropna(subset=['label'])

In [None]:
# Creating testing/training data for KYOTO-2006.
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
y = kyoto.Label
x = kyoto.drop(labels = ['Label'], axis = 1)
x = scaler.fit_transform(x)
X_train_KYOTO, X_test_KYOTO, y_train_KYOTO, y_test_KOYTO = train_test_split(x, y, test_size=0.33, random_state=42)

In [None]:
# Creating testing/training data for UNSW-NB15.
scaler = StandardScaler()
y = unsw.label
x = unsw.drop(labels = ['label'], axis = 1)
x = scaler.fit_transform(x)
X_train_UNSW, X_test_UNSW, y_train_UNSW, y_test_UNSW = train_test_split(x, y, test_size=0.33, random_state=42)

# Final Convolutional Neural Network:

In [None]:
import tensorflow as tf

# Define the input shape.
input_shape = (9,1)

# Create a CNN with the different layers (Conv1D, MaxPooling, Dropout, and Dense).
cnn = tf.keras.Sequential()
cnn.add(tf.keras.layers.Conv1D(32, kernel_size=1, activation='relu', input_shape=input_shape))
cnn.add(tf.keras.layers.MaxPooling1D(pool_size=1))
cnn.add(tf.keras.layers.Conv1D(64, kernel_size=1, activation='relu'))
cnn.add(tf.keras.layers.MaxPooling1D(pool_size=1))
cnn.add(tf.keras.layers.Conv1D(128, kernel_size=1, activation='relu'))
cnn.add(tf.keras.layers.MaxPooling1D(pool_size=1))
cnn.add(tf.keras.layers.Flatten())
cnn.add(tf.keras.layers.Dense(128, activation='relu'))
cnn.add(tf.keras.layers.Dropout(0.2))
cnn.add(tf.keras.layers.Dense(1, activation='sigmoid'))

# Define the learning rate.
learning_rate = 0.001

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate)

# Compile the model with binary crossentropy loss and the Adam optimizer.
cnn.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Get the initial weights of the architecture. 
initial_weights = cnn.get_weights()

In [None]:
# Train the model on the UNSW-NB15 data.
cnn.fit(X_train_UNSW, y_train_UNSW, batch_size=64, epochs=10)

# Get the accuracy of the model on the test data.
loss, accuracy = cnn.evaluate(X_test_UNSW, y_test_UNSW)
print('Test accuracy:', accuracy)

In [None]:
# Unfit the model.
cnn.set_weights(initial_weights)

# Train the model on the KYOTO-2006 data.
cnn.fit(X_train_KYOTO, y_train_KYOTO, batch_size=64, epochs=10)

# Get the accuracy of the model on the test data.
loss, accuracy = cnn.evaluate(X_test_KYOTO, y_test_KOYTO)
print('Test accuracy:', accuracy)