### import packages

In [8]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout


### load the dataset

In [3]:

# List of column names based on the provided dataset documentation
column_names = [
    'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land',
    'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',
    'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
    'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count',
    'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
    'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
    'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
    'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
    'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'label'
]

# Load the dataset
kdd_data_10_percent = pd.read_csv('kddcup.data_10_percent_corrected', header=None, names=column_names)

# Display the first few rows of the DataFrame to verify
kdd_data_10_percent.head()



### Separate features and label


In [9]:
X = kdd_data_10_percent.drop('label', axis=1)
y = kdd_data_10_percent['label']

### Convert String Labels to Numeric Values

In [10]:
# Convert string labels to numeric values
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Check the unique classes to ensure binary classification
print(f"Unique classes in the target variable: {label_encoder.classes_}")


Unique classes in the target variable: ['back.' 'buffer_overflow.' 'ftp_write.' 'guess_passwd.' 'imap.'
 'ipsweep.' 'land.' 'loadmodule.' 'multihop.' 'neptune.' 'nmap.' 'normal.'
 'perl.' 'phf.' 'pod.' 'portsweep.' 'rootkit.' 'satan.' 'smurf.' 'spy.'
 'teardrop.' 'warezclient.' 'warezmaster.']


### Identify Symbolic and Continuous Features

In [11]:
# Identify symbolic (categorical) and continuous features
symbolic_features = ['protocol_type', 'service', 'flag', 'land', 'logged_in', 'is_host_login', 'is_guest_login']
continuous_features = [col for col in column_names if col not in symbolic_features + ['label']]

### Preprocess the Data

In [12]:
# Preprocess the data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), continuous_features),
        ('cat', OneHotEncoder(), symbolic_features)
    ])

# Fit and transform the features
X_processed = preprocessor.fit_transform(X)


### Define the Model

In [13]:
# Define the model
model = Sequential()
model.add(Dense(64, input_dim=X_processed.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### Train the Model

In [14]:
history = model.fit(X_processed, y, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m12351/12351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 2ms/step - accuracy: 6.6477e-05 - loss: -77438512.0000 - val_accuracy: 1.3157e-04 - val_loss: -923578304.0000
Epoch 2/10
[1m12351/12351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 2ms/step - accuracy: 2.9880e-05 - loss: -1783518336.0000 - val_accuracy: 1.3157e-04 - val_loss: -5544832000.0000
Epoch 3/10
[1m12351/12351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 3ms/step - accuracy: 4.3839e-05 - loss: -7920915968.0000 - val_accuracy: 1.3157e-04 - val_loss: -16398288896.0000
Epoch 4/10
[1m12351/12351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 3ms/step - accuracy: 3.3294e-05 - loss: -21113147392.0000 - val_accuracy: 1.3157e-04 - val_loss: -35922952192.0000
Epoch 5/10
[1m12351/12351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 3ms/step - accuracy: 5.2738e-05 - loss: -43752718336.0000 - val_accuracy: 1.3157e-04 - val_loss: -66718179328.0000
Epoch 6/10
[1m12351/12

###  Display Training History

In [15]:
history.history 

{'accuracy': [4.301445369492285e-05,
  4.301445369492285e-05,
  4.301445369492285e-05,
  4.301445369492285e-05,
  4.301445369492285e-05,
  4.301445369492285e-05,
  4.301445369492285e-05,
  4.301445369492285e-05,
  4.301445369492285e-05,
  4.301445369492285e-05],
 'loss': [-270297696.0,
  -2882086144.0,
  -10598487040.0,
  -25925492736.0,
  -51440930816.0,
  -89805242368.0,
  -143659057152.0,
  -215347462144.0,
  -307805585408.0,
  -423062306816.0],
 'val_accuracy': [0.00013157229113858193,
  0.00013157229113858193,
  0.00013157229113858193,
  0.00013157229113858193,
  0.00013157229113858193,
  0.00013157229113858193,
  0.00013157229113858193,
  0.00013157229113858193,
  0.00013157229113858193,
  0.00013157229113858193],
 'val_loss': [-923578304.0,
  -5544832000.0,
  -16398288896.0,
  -35922952192.0,
  -66718179328.0,
  -111330402304.0,
  -172307988480.0,
  -252099133440.0,
  -353233043456.0,
  -478172413952.0]}

### serialize model

In [16]:
model.save('nids_model.h5')

