In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Load and preprocess NSL-KDD dataset
def load_data(filepath):
    # Load the dataset
    data = pd.read_csv(filepath, header=None)
    # Define column names
    columns = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes',
               'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in',
               'num_compromised', 'root_shell', 'su_attempted', 'num_root',
               'num_file_creations', 'num_shells', 'num_access_files', 'num_outbound_cmds',
               'is_host_login', 'is_guest_login', 'count', 'srv_count', 'serror_rate',
               'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate',
               'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
               'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
               'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate',
               'dst_host_serror_rate', 'dst_host_srv_serror_rate', 'dst_host_rerror_rate',
               'dst_host_srv_rerror_rate', 'label']
    data.columns = columns
    return data

# Map labels for different classification tasks
def map_labels(data, task='2-class'):
    if task == '2-class':
        data['label'] = data['label'].apply(lambda x: 'anomaly' if x != 'normal' else 'normal')
    elif task == '5-class':
        attack_mapping = {
            'normal': 'normal',
            'back': 'DoS', 'land': 'DoS', 'neptune': 'DoS', 'pod': 'DoS',
            'smurf': 'DoS', 'teardrop': 'DoS',
            'ipsweep': 'Probe', 'nmap': 'Probe', 'portsweep': 'Probe', 'satan': 'Probe',
            'ftp_write': 'R2L', 'guess_passwd': 'R2L', 'imap': 'R2L', 'multihop': 'R2L',
            'phf': 'R2L', 'spy': 'R2L', 'warezclient': 'R2L', 'warezmaster': 'R2L',
            'buffer_overflow': 'U2R', 'loadmodule': 'U2R', 'perl': 'U2R', 'rootkit': 'U2R'
        }
        data['label'] = data['label'].map(attack_mapping)
    return data

# Preprocess data
def preprocess_data(data):
    # Encode categorical features
    categorical_features = ['protocol_type', 'service', 'flag']
    for feature in categorical_features:
        le = LabelEncoder()
        data[feature] = le.fit_transform(data[feature])

    # Encode labels
    label_encoder = LabelEncoder()
    data['label'] = label_encoder.fit_transform(data['label'])
    return data, label_encoder

# Build a neural network model
def build_model(input_dim, num_classes):
    model = Sequential([
        Dense(128, input_dim=input_dim, activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Calculate False Alarm Rate (FAR)
def calculate_far(conf_matrix):
    false_positives = conf_matrix.sum(axis=0) - np.diag(conf_matrix)
    true_negatives = conf_matrix.sum() - (conf_matrix.sum(axis=1) + false_positives)
    far = false_positives / (false_positives + true_negatives)
    return far.mean()

# Train and evaluate the model
def train_and_evaluate(train_data, test_data, task):
    # Map labels for the task
    train_data = map_labels(train_data, task)
    test_data = map_labels(test_data, task)

    # Preprocess training and testing data
    train_data, label_encoder = preprocess_data(train_data)
    test_data, _ = preprocess_data(test_data)

    # Split data into features and labels
    X_train, y_train = train_data.drop(columns=['label']), train_data['label']
    X_test, y_test = test_data.drop(columns=['label']), test_data['label']

    # Standardize features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # One-hot encode labels for multi-class tasks
    y_train_encoded = to_categorical(y_train)
    y_test_encoded = to_categorical(y_test)

    # Build and train the model
    model = build_model(X_train.shape[1], y_train_encoded.shape[1])
    model.fit(X_train, y_train_encoded, epochs=20, batch_size=32, verbose=1)

    # Evaluate the model
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)

    # Compute metrics
    cm = confusion_matrix(y_test, y_pred_classes)
    far = calculate_far(cm)
    print(f"\n--- Results for {task} ---")
    print("Confusion Matrix:\n", cm)
    print(classification_report(y_test, y_pred_classes))
    print(f"Accuracy: {accuracy_score(y_test, y_pred_classes)}")
    print(f"False Alarm Rate (FAR): {far}")

# Main workflow
if __name__ == "__main__":
    # Load separate datasets
    train_filepath = "C:\\Users\\DELL 5540\\Desktop\\NIDS\\NSL_KDD_Train.csv"
    test_filepath = "C:\\Users\\DELL 5540\\Desktop\\NIDS\\NSL_KDD_Test.csv"
    train_data = load_data(train_filepath)
    test_data = load_data(test_filepath)

    # Perform tasks for 2-class and 5-class classification
    for task in ['2-class', '5-class']:
        print(f"\n--- Task: {task} ---")
        train_and_evaluate(train_data.copy(), test_data.copy(), task)



--- Task: 2-class ---
Epoch 1/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.3168 - val_loss: 0.0947
Epoch 2/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.1040 - val_loss: 0.0505
Epoch 3/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.0817 - val_loss: 0.0554
Epoch 4/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.0795 - val_loss: 0.0487
Epoch 5/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.1007 - val_loss: 0.0798
Epoch 6/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.0748 - val_loss: 0.0407
Epoch 7/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.1124 - val_loss: 0.0675
Epoch 8/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.0650 - val_loss: 0.0268
E

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9336 - loss: 0.1773
Epoch 2/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 991us/step - accuracy: 0.9759 - loss: 0.0643
Epoch 3/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 966us/step - accuracy: 0.9810 - loss: 0.0514
Epoch 4/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 953us/step - accuracy: 0.9810 - loss: 0.0501
Epoch 5/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 947us/step - accuracy: 0.9827 - loss: 0.0448
Epoch 6/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 978us/step - accuracy: 0.9832 - loss: 0.0433
Epoch 7/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 978us/step - accuracy: 0.9845 - loss: 0.0413
Epoch 8/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 975us/step - accuracy: 0.9838 - loss: 0.0404
Epoch 9/20
[1m3937/3

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9152 - loss: 0.2813
Epoch 2/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9774 - loss: 0.0710
Epoch 3/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9813 - loss: 0.0591
Epoch 4/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9823 - loss: 0.0563
Epoch 5/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9827 - loss: 0.0514
Epoch 6/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 999us/step - accuracy: 0.9832 - loss: 0.0508
Epoch 7/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9836 - loss: 0.0475
Epoch 8/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9849 - loss: 0.0467
Epoch 9/20
[1m3937/3937[0m [32

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 0.3482 - val_loss: 0.0977
Epoch 2/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.1328 - val_loss: 0.0578
Epoch 3/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.0823 - val_loss: 0.0442
Epoch 4/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.1287 - val_loss: 0.0268
Epoch 5/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.0659 - val_loss: 0.0345
Epoch 6/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.0891 - val_loss: 0.0189
Epoch 7/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.0597 - val_loss: 0.0245
Epoch 8/20
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0490 - val_loss: 0.0251
Epoch 9/20
[1m3150/3150

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.8760 - loss: 0.5479
Epoch 2/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9587 - loss: 0.1328
Epoch 3/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9681 - loss: 0.1037
Epoch 4/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9741 - loss: 0.0893
Epoch 5/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9752 - loss: 0.0827
Epoch 6/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9753 - loss: 0.0813
Epoch 7/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9767 - loss: 0.0760
Epoch 8/20
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9786 - loss: 0.0736
Epoch 9/20
[1m3937/3937[0m [32m━

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
