## GENETIK ALGORITMA


In [209]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import pygad
import warnings

In [210]:
warnings.filterwarnings('ignore')

# Load datasets
kdd_train = pd.read_csv('/Users/dandyantariksa/Downloads/RM-GA/KDDTrain+.txt', header=None)
kdd_test = pd.read_csv('/Users/dandyantariksa/Downloads/RM-GA/KDDTest+.txt', header=None)

kdd_train = kdd_train.iloc[:,:-1]
kdd_test = kdd_test.iloc[:,:-1]

In [211]:
columns = (['duration'
,'protocol_type'
,'service'
,'flag'
,'src_bytes'
,'dst_bytes'
,'land'
,'wrong_fragment'
,'urgent'
,'hot'
,'num_failed_logins'
,'logged_in'
,'num_compromised'
,'root_shell'
,'su_attempted'
,'num_root'
,'num_file_creations'
,'num_shells'
,'num_access_files'
,'num_outbound_cmds'
,'is_host_login'
,'is_guest_login'
,'count'
,'srv_count'
,'serror_rate'
,'srv_serror_rate'
,'rerror_rate'
,'srv_rerror_rate'
,'same_srv_rate'
,'diff_srv_rate'
,'srv_diff_host_rate'
,'dst_host_count'
,'dst_host_srv_count'
,'dst_host_same_srv_rate'
,'dst_host_diff_srv_rate'
,'dst_host_same_src_port_rate'
,'dst_host_srv_diff_host_rate'
,'dst_host_serror_rate'
,'dst_host_srv_serror_rate'
,'dst_host_rerror_rate'
,'dst_host_srv_rerror_rate'
,'attack'])

kdd_train.columns = columns
kdd_test.columns = columns

In [212]:
# Label encoding for categorical variables
label_encoders = {}
for column in ['protocol_type', 'flag', 'service']:
    le = LabelEncoder()
    kdd_train[column] = le.fit_transform(kdd_train[column])
    kdd_test[column] = le.transform(kdd_test[column])
    label_encoders[column] = le

In [213]:
# Feature scaling
log_features = ['duration', 'src_bytes', 'dst_bytes']
kdd_train[log_features] = kdd_train[log_features].apply(lambda x: np.log1p(x))
kdd_test[log_features] = kdd_test[log_features].apply(lambda x: np.log1p(x))

minmax = MinMaxScaler()
minmax.fit(kdd_train[log_features])
kdd_train[log_features] = minmax.transform(kdd_train[log_features])
kdd_test[log_features] = minmax.transform(kdd_test[log_features])

In [214]:
# Define the training function
def train_RF(num_epochs, epsilon, decline_limit):
    model = RandomForestClassifier()  # Ganti ini jika diperlukan
    X = kdd_train.drop('attack', axis=1)
    y = kdd_train['attack']
    accuracies = cross_val_score(model, X, y, cv=5)  # 5-fold cross-validation
    final_accuracy = np.mean(accuracies)
    return final_accuracy, None, None  # Ubah None sesuai kebutuhan

In [215]:
# Genetic Algorithm for optimizing hyperparameters
def fitness_func(ga_instance, solution, solution_idx):
    epsilon, decline_limit = solution
    final_accuracy, _, _ = train_RF(num_epochs=1, epsilon=epsilon, decline_limit=decline_limit)
    return final_accuracy


In [216]:
# Define gene space for Genetic Algorithm
gene_space = [
    [0.1, 0.2],  # epsilon
    [1, 2]       # decline_limit
]


In [217]:
# Setting up the Genetic Algorithm
ga_instance = pygad.GA(
    num_generations=2,
    num_parents_mating=3,
    fitness_func=fitness_func,
    sol_per_pop=10,
    num_genes=2,
    gene_space=gene_space,
    parent_selection_type="sss",
    crossover_type="single_point",
    mutation_type="random",
    mutation_probability=0.1
)

In [218]:
# Run the Genetic Algorithm
ga_instance.run()



In [219]:
# Retrieve the best solution from the GA
solution, solution_fitness, solution_idx = ga_instance.best_solution()
print(f"Best solution: epsilon={solution[0]}, decline_limit={solution[1]}")
print(f"Fitness value of the best solution: {solution_fitness}")

Best solution: epsilon=0.2, decline_limit=1.0
Fitness value of the best solution: 0.998531431337906


## NEUTRAL NETWORKS

In [220]:
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import recall_score

In [221]:
kdd_combined = pd.concat([kdd_train, kdd_test], keys=['train', 'test'])


In [222]:
# Mengonversi kolom 'attack' ke format numerik
label_encoder = LabelEncoder()
kdd_combined['attack'] = label_encoder.fit_transform(kdd_combined['attack'])

In [223]:
# Pisahkan kembali data pelatihan dan pengujian
y_train = kdd_combined.xs('train')['attack'].values
y_test = kdd_combined.xs('test')['attack'].values

In [224]:

# Mengambil fitur (X) dari dataset
x_train = kdd_combined.xs('train').drop('attack', axis=1).values
x_test = kdd_combined.xs('test').drop('attack', axis=1).values


In [225]:

# Mengambil fitur (X) dari dataset
x_train = kdd_combined.xs('train').drop('attack', axis=1).values
x_test = kdd_combined.xs('test').drop('attack', axis=1).values


In [226]:
# Model Neural Network
model_nn = tf.keras.Sequential([
    tf.keras.layers.Dense(units=64, activation='relu', input_shape=(x_train.shape[1],),
                          kernel_regularizer=tf.keras.regularizers.L1L2(l1=1e-5, l2=1e-4),
                          bias_regularizer=tf.keras.regularizers.L2(1e-4),
                          activity_regularizer=tf.keras.regularizers.L2(1e-5)),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(units=128, activation='relu',
                          kernel_regularizer=tf.keras.regularizers.L1L2(l1=1e-5, l2=1e-4),
                          bias_regularizer=tf.keras.regularizers.L2(1e-4),
                          activity_regularizer=tf.keras.regularizers.L2(1e-5)),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(units=512, activation='relu',
                          kernel_regularizer=tf.keras.regularizers.L1L2(l1=1e-5, l2=1e-4),
                          bias_regularizer=tf.keras.regularizers.L2(1e-4),
                          activity_regularizer=tf.keras.regularizers.L2(1e-5)),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(units=128, activation='relu',
                          kernel_regularizer=tf.keras.regularizers.L1L2(l1=1e-5, l2=1e-4),
                          bias_regularizer=tf.keras.regularizers.L2(1e-4),
                          activity_regularizer=tf.keras.regularizers.L2(1e-5)),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(units=1, activation='sigmoid'),  # Sesuaikan dengan binary classification
])


In [227]:
# Compile the model
model_nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [228]:
# Melatih model
model_nn.fit(x_train, y_train, epochs=50, batch_size=32, verbose=1)

Epoch 1/50
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - accuracy: 0.0078 - loss: -43729864.0000
Epoch 2/50
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.0078 - loss: -514535040.0000
Epoch 3/50
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.0072 - loss: -1410443264.0000
Epoch 4/50
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.0071 - loss: -2705908992.0000
Epoch 5/50
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.0076 - loss: -4348161536.0000
Epoch 6/50
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - accuracy: 0.0077 - loss: -6361755136.0000
Epoch 7/50
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - accuracy: 0.0076 - loss: -8759425024.0000
Epoch 8/50
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

<keras.src.callbacks.history.History at 0x15c5d7e80>

In [229]:
# Prediksi pada data pelatihan
y_train_pred_nn = model_nn.predict(x_train)
y_train_pred_nn = (y_train_pred_nn > 0.5).astype(int).flatten()

# Prediksi pada data pengujian
y_test_pred_nn = model_nn.predict(x_test)
y_test_pred_nn = (y_test_pred_nn > 0.5).astype(int).flatten()

[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step
[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [232]:
from sklearn.metrics import recall_score

# Menghitung recall dengan average='macro'
nn_train_recall = recall_score(y_train, y_train_pred_nn, average='macro')
nn_test_recall = recall_score(y_test, y_test_pred_nn, average='macro')

print(f'Recall (Training): {nn_train_recall}')
print(f'Recall (Testing): {nn_test_recall}')

Recall (Training): 0.043478260869565216
Recall (Testing): 0.02631578947368421


In [235]:
# Plotting the results
labels = ['NN Train', 'NN Test']
recalls = [nn_train_recall, nn_test_recall]