In [None]:
pip install pandas numpy scikit-learn tensorflow



In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import to_categorical  # Import this


# Load dataset (Ensure you replace 'your_dataset.csv' with the actual file)
df = pd.read_csv("/content/UNSW_NB15_training-set.csv")


**ML Training**

In [None]:
# Convert labels to one-hot encoding
y_train_dl = to_categorical(y_train, num_classes=len(target_encoder.classes_))
y_test_dl = to_categorical(y_test, num_classes=len(target_encoder.classes_))

# Identify categorical and numerical columns
categorical_columns = ['proto', 'service', 'state', 'attack_cat']
numerical_columns = [col for col in df.columns if col not in categorical_columns + ['label']]

# Encode categorical features
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode the target variable (Ensuring No Unseen Labels)
target_encoder = LabelEncoder()
df['label'] = target_encoder.fit_transform(df['label'])

# Split the dataset into Train & Test (80-20 Split)
X = df.drop(columns=['label'])
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Handle Unseen Categories in Test Data
for col in categorical_columns:
    X_test[col] = X_test[col].apply(lambda x: x if x in label_encoders[col].classes_ else -1)

# Normalize numerical features
scaler = StandardScaler()
X_train[numerical_columns] = scaler.fit_transform(X_train[numerical_columns])
X_test[numerical_columns] = scaler.transform(X_test[numerical_columns])

# Ensure y_test has only seen labels
y_test = y_test[y_test.isin(y_train.unique())]
X_test = X_test.loc[y_test.index]

# === MACHINE LEARNING MODEL: Random Forest ===
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)

# Evaluate ML Model
print(f"Random Forest Accuracy: {accuracy_score(y_test, y_pred_rf):.4f}")
print("Random Forest Classification Report:")
print(classification_report(y_test_filtered, y_pred_filtered, labels=list(range(len(labels))), target_names=labels))

Random Forest Accuracy: 0.9232
Random Forest Classification Report:
                precision    recall  f1-score   support

      Analysis       0.61      0.24      0.34       253
      Backdoor       0.42      0.32      0.36       209
           DoS       0.35      0.29      0.31      1361
      Exploits       0.68      0.84      0.75      3859
       Fuzzers       0.93      0.88      0.91      2135
       Generic       1.00      0.98      0.99      1791
        Normal       1.00      1.00      1.00       961
Reconnaissance       0.88      0.67      0.76      1185
     Shellcode       0.55      0.32      0.41       105
         Worms       1.00      0.22      0.36        18

      accuracy                           0.78     11877
     macro avg       0.74      0.58      0.62     11877
  weighted avg       0.77      0.78      0.77     11877



**Testing**

In [None]:
# Load Testing Dataset
df_test = pd.read_csv("/content/testing.csv")

# Preprocess Testing Data (Same as Training)
categorical_columns = ['proto', 'service', 'state', 'attack_cat']
numerical_columns = [col for col in df_test.columns if col not in categorical_columns + ['label']]

# Encode Categorical Features using the same LabelEncoders
for col in categorical_columns:
    df_test[col] = df_test[col].apply(lambda x: label_encoders[col].transform([x])[0] if x in label_encoders[col].classes_ else -1)

# Standardize Numerical Features using the same Scaler
df_test[numerical_columns] = scaler.transform(df_test[numerical_columns])

# Predict using the ML Model
y_pred_test = rf_model.predict(df_test.drop(columns=['label']))

# Convert Predicted Labels back to Attack Categories
df_test['Predicted_Label'] = y_pred_test
df_test['Attack_Type'] = target_encoder.inverse_transform(y_pred_test)  # Convert back to attack name

# Identify if Attack is Detected
df_test['Attack_Detected'] = df_test['Predicted_Label'].apply(lambda x: 'Yes' if x > 0 else 'No')

# Display Results
print(df_test[['Attack_Detected', 'Attack_Type']])

# Save Results to CSV
df_test[['Attack_Detected', 'Attack_Type']].to_csv("attack_detection_results.csv", index=False)
print("Results saved as attack_detection_results.csv")


   Attack_Detected  Attack_Type
0               No            0
1               No            0
2               No            0
3               No            0
4               No            0
..             ...          ...
82             Yes            1
83             Yes            1
84             Yes            1
85             Yes            1
86             Yes            1

[87 rows x 2 columns]
Results saved as attack_detection_results.csv


In [None]:
import pandas as pd
import numpy as np
import random

# Load Testing Dataset
df_test = pd.read_csv("/content/testing.csv")

# Define attack type mapping
attack_mapping = {0: 'Normal', 1: 'Fuzzers', 2: 'Analysis', 3: 'Backdoor', 4: 'DoS',
                  5: 'Exploits', 6: 'Generic', 7: 'Reconnaissance', 8: 'Shellcode', 9: 'Worms'}

# Ensure attack_cat column exists
if 'attack_cat' in df_test.columns:
    df_test['Actual_Attack_Type'] = df_test['attack_cat'].map(attack_mapping)
else:
    raise ValueError("Column 'attack_cat' not found in testing dataset.")

# Simulating Model Prediction (Replace this with actual model predictions)
np.random.seed()  # Remove fixed seed for randomness
df_test['Predicted_Label'] = np.random.choice(list(attack_mapping.keys()), size=len(df_test))

# Convert Predicted Labels to Attack Type Names
df_test['Predicted_Attack_Type'] = df_test['Predicted_Label'].map(attack_mapping)

# Identify if Attack is Detected
df_test['Attack_Detected'] = df_test['Predicted_Attack_Type'].apply(lambda x: 'Yes' if x != "Normal" else 'No')

# Take 5 random samples from the dataset
random_samples = df_test.sample(n=5, random_state=None)

# Display Results
print("\n=== Attack Detection Results (Random Samples) ===")
for index, row in random_samples.iterrows():
    print(f"Sample {index}:")
    print(f"  Attack Detected: {row['Attack_Detected']}")
    print(f"  Attack Type: {row['Predicted_Attack_Type']}\n")



=== Attack Detection Results (Random Samples) ===
Sample 76:
  Attack Detected: No
  Attack Type: Normal

Sample 11:
  Attack Detected: Yes
  Attack Type: Reconnaissance

Sample 30:
  Attack Detected: Yes
  Attack Type: DoS

Sample 8:
  Attack Detected: Yes
  Attack Type: Fuzzers

Sample 82:
  Attack Detected: No
  Attack Type: Normal



# New Section



**Deep Learnnig Training**

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, classification_report

# One-hot encode the labels
y_train_dl = to_categorical(y_train, num_classes=len(target_encoder.classes_))
y_test_dl = to_categorical(y_test, num_classes=len(target_encoder.classes_))

# Define the Neural Network model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(len(target_encoder.classes_), activation='softmax')  # Output layer for classification
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train_dl, epochs=20, batch_size=32, validation_data=(X_test, y_test_dl), verbose=1)

# Predict labels and convert back to class indices
y_pred_dl = np.argmax(model.predict(X_test), axis=1)

# Evaluate the model
print(f"Neural Network Accuracy: {accuracy_score(y_test, y_pred_dl):.4f}")
print("Neural Network Classification Report:")

# Convert target names to string
target_names = list(map(str, target_encoder.classes_))  # Ensure labels are strings
print(classification_report(y_test, y_pred_dl, target_names=target_names))


Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1485/1485[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8780 - loss: 0.9765 - val_accuracy: 0.9110 - val_loss: 0.3713
Epoch 2/20
[1m1485/1485[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9257 - loss: 0.1620 - val_accuracy: 0.9248 - val_loss: 0.3443
Epoch 3/20
[1m1485/1485[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.9530 - loss: 0.1096 - val_accuracy: 0.9250 - val_loss: 0.4711
Epoch 4/20
[1m1485/1485[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9772 - loss: 0.0621 - val_accuracy: 0.9267 - val_loss: 0.6943
Epoch 5/20
[1m1485/1485[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9844 - loss: 0.0459 - val_accuracy: 0.9248 - val_loss: 1.0197
Epoch 6/20
[1m1485/1485[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9891 - loss: 0.0331 - val_accuracy: 0.9231 - val_loss: 1.3393
Epoch 7/20
[1m1485/1485[

**Testing**

In [None]:
import pandas as pd
import numpy as np
import pickle
from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split

# Load test dataset
df = pd.read_csv('/content/testing.csv')  # Updated path

# Set label column
label_column = 'attack_cat'

# Check if label column exists
if label_column not in df.columns:
    raise ValueError(f"Column '{label_column}' not found in dataset.")

# Separate features and labels
X = df.drop(columns=[label_column])
y = df[label_column]

# Optional: Handle categorical columns (only if they exist)
categorical_columns = ['protocol_type', 'flag']
existing_categoricals = [col for col in categorical_columns if col in X.columns]

if existing_categoricals:
    X = pd.get_dummies(X, columns=existing_categoricals)

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Load model and encoders
model = load_model("attack_detection_model.h5")

with open("target_encoder.pkl", "rb") as f:
    target_encoder = pickle.load(f)

with open("attack_mapping.pkl", "rb") as f:
    attack_mapping = pickle.load(f)

# Apply label encoding to y_test if needed
y_test_encoded = target_encoder.transform(y_test)

# Predict
y_pred_dl = np.argmax(model.predict(X_test), axis=1)

# Create result dataframe
df_test = X_test.copy().reset_index(drop=True)
df_test[label_column] = y_test.reset_index(drop=True)
df_test['Predicted_Label'] = y_pred_dl
df_test['Predicted_Attack_Type'] = df_test['Predicted_Label'].map(attack_mapping)

# Mark detection
df_test['Attack_Detected'] = df_test['Predicted_Attack_Type'].apply(
    lambda x: 'Yes' if x != "Normal" else 'No'
)

# Show random samples
random_samples = df_test.sample(n=5, random_state=None)
for index, row in random_samples.iterrows():
    print(f"Sample {index}:")
    print(f"  Attack Detected: {row['Attack_Detected']}")
    print(f"  Attack Type: {row['Predicted_Attack_Type']}\n")


FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'attack_detection_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)