# CNN Model Training for Network Intrusion Detection
Using UNSW NB15 Dataset

This notebook demonstrates how to load the UNSW NB15 dataset, preprocess it, build a CNN model, train it, and save the model for later use.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
import os


## Load Dataset
Assuming the UNSW NB15 dataset CSV files are downloaded and placed in the `data/` directory.

In [None]:
# Load dataset
data_path = 'data/UNSW_NB15.csv'  # Update path as needed
df = pd.read_csv(data_path)
df.head()

## Preprocess Data
- Encode categorical features
- Scale numerical features
- Prepare input features and labels

In [None]:
# Drop columns that are not useful or identifiers
df = df.drop(['id'], axis=1, errors='ignore')

# Encode categorical columns
categorical_cols = df.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

# Separate features and target
X = df.drop('label', axis=1)
y = df['label']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Encode labels
y_encoded = to_categorical(y)

# Reshape X for Conv1D: (samples, timesteps, features=1)
X_reshaped = np.expand_dims(X_scaled, axis=2)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y_encoded, test_size=0.2, random_state=42)


## Build CNN Model

In [None]:
model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Conv1D(128, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(y_train.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

## Train Model

In [None]:
history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_split=0.2)

## Evaluate Model

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

## Save Model and Scaler

In [None]:
model.save('cnn_nids_model.h5')

import joblib
joblib.dump(scaler, 'scaler.save')