In [1]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv('/content/kdd_test.csv')  # or your full dataset
df.columns = [f"col_{i}" for i in range(len(df.columns)-1)] + ['label']

# Keep copy of original label
df['attack_type'] = df['label']

# Encode categorical columns
for col in df.select_dtypes(include='object').columns:
    if col != 'label':
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])

# Convert label to 'attack' or 'normal'
df['label'] = df['label'].apply(lambda x: 'normal' if 'normal' in x else 'attack')

# Encode label
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Split data
X = df.drop(['label', 'attack_type'], axis=1)
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train model
model = RandomForestClassifier()
model.fit(X_train_scaled, y_train)

# Evaluate accuracy on test data
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"ML Model Accuracy: {accuracy * 100:.2f}%")

# Save everything
joblib.dump(model, 'ml_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')




ML Model Accuracy: 98.82%


['label_encoder.pkl']

In [2]:
import pandas as pd

# Load your dataset
df = pd.read_csv('/content/kdd_test.csv')  # replace with your file path

# Reduce to 1000 rows randomly
df_reduced = df.sample(n=1000, random_state=42)  # random_state for reproducibility

# Save the reduced dataset
df_reduced.to_csv('testing_dataset.csv', index=False)


In [3]:
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder

# Load the testing dataset
df = pd.read_csv('/content/testing_dataset.csv')
df.columns = [f"col_{i}" for i in range(len(df.columns)-1)] + ['label']

# ✅ Store original attack names before any encoding
original_attacks = df['label'].values.copy()

# Encode categorical columns (skip label for now)
for col in df.select_dtypes(include='object').columns:
    if col != 'label':
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])

# Binary label: 'normal' or 'attack'
df['label'] = df['label'].apply(lambda x: 'normal' if 'normal' in x else 'attack')

# Load saved objects
label_encoder = joblib.load('label_encoder.pkl')
scaler = joblib.load('scaler.pkl')
ml_model = joblib.load('ml_model.pkl')

# Encode 'label' column: normal=1, attack=0
df['label'] = label_encoder.transform(df['label'])

# Prepare feature set
X_test = df.drop(['label'], axis=1)
X_test_scaled = scaler.transform(X_test)

# 🎯 Select 3 random samples
sample_df = df.sample(n=3, random_state=42)
sample_indices = sample_df.index
sample_X_scaled = X_test_scaled[sample_indices]

# Predict
sample_preds = ml_model.predict(sample_X_scaled)
sample_attack_names = original_attacks[sample_indices]  # ✅ Use real names

# 🖨️ Display results
print("\n🔍 ML Predictions on 3 Random Samples:")
for i, (pred, attack_name) in enumerate(zip(sample_preds, sample_attack_names)):
    status = "✅ Normal" if pred == 1 else "❌ Attack Detected"
    print(f"Sample {i+1}: {status} | Attack Type: {attack_name}")



🔍 ML Predictions on 3 Random Samples:
Sample 1: ✅ Normal | Attack Type: normal
Sample 2: ✅ Normal | Attack Type: normal
Sample 3: ❌ Attack Detected | Attack Type: warezclient


**DEEP LEARNING**

In [10]:
import pandas as pd
import joblib
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Load dataset
df = pd.read_csv('/content/kdd_test.csv')  # Or use your full training dataset
df.columns = [f"col_{i}" for i in range(len(df.columns)-1)] + ['label']

# Save a copy of attack type
df['attack_type'] = df['label']

# Encode categorical features
for col in df.select_dtypes(include='object').columns:
    if col != 'label':
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])

# Encode label as attack or normal
df['label'] = df['label'].apply(lambda x: 'normal' if 'normal' in x else 'attack')

# Label encode the target column
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])  # normal=1, attack=0

# Features and target
X = df.drop(['label', 'attack_type'], axis=1)
y = df['label']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define DL model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_scaled, y_train, epochs=150, batch_size=32, validation_split=0.2)

# Save model and encoders
model.save('dl_model.h5')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

print("✅ DL Model, Scaler, and Label Encoder saved!")


Epoch 1/150


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.8278 - loss: 0.3659 - val_accuracy: 0.9315 - val_loss: 0.1523
Epoch 2/150
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9315 - loss: 0.1591 - val_accuracy: 0.9515 - val_loss: 0.1171
Epoch 3/150
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9519 - loss: 0.1210 - val_accuracy: 0.9556 - val_loss: 0.0991
Epoch 4/150
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9579 - loss: 0.1056 - val_accuracy: 0.9673 - val_loss: 0.0938
Epoch 5/150
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9614 - loss: 0.1018 - val_accuracy: 0.9665 - val_loss: 0.0865
Epoch 6/150
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9611 - loss: 0.0994 - val_accuracy: 0.9673 - val_loss: 0.0843
Epoch 7/150
[1m451/451[0m [32m━



✅ DL Model, Scaler, and Label Encoder saved!


In [11]:
import pandas as pd
import numpy as np
import joblib
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder

# Load the testing dataset
df = pd.read_csv('/content/testing_dataset.csv')
df.columns = [f"col_{i}" for i in range(len(df.columns)-1)] + ['label']

# Save original attack type names
original_attacks = df['label'].values.copy()

# Encode categorical columns (not 'label')
for col in df.select_dtypes(include='object').columns:
    if col != 'label':
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])

# Binary classification label
df['label'] = df['label'].apply(lambda x: 'normal' if 'normal' in x else 'attack')

# Load saved encoders and model
label_encoder = joblib.load('label_encoder.pkl')
scaler = joblib.load('scaler.pkl')
dl_model = load_model('dl_model.h5')  # Change name if yours is different

# Encode label
df['label'] = label_encoder.transform(df['label'])

# Prepare features
X_test = df.drop(['label'], axis=1)
X_test_scaled = scaler.transform(X_test)

# Pick 3 random test samples
sample_df = df.sample(n=10, random_state=42)
sample_indices = sample_df.index
sample_X_scaled = X_test_scaled[sample_indices]
sample_attack_names = original_attacks[sample_indices]

# Predict with DL model
sample_preds_probs = dl_model.predict(sample_X_scaled)
sample_preds = (sample_preds_probs > 0.5).astype(int).flatten()

# 🖨️ Show output
print("\n🔍 DL Predictions on 3 Random Samples:")
for i, (pred, attack_name) in enumerate(zip(sample_preds, sample_attack_names)):
    status = "Normal" if pred == 1 else " Attack Detected"
    print(f"Sample {i+1}: {status} | Attack Type: {attack_name}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step

🔍 DL Predictions on 3 Random Samples:
Sample 1: Normal | Attack Type: normal
Sample 2: Normal | Attack Type: normal
Sample 3:  Attack Detected | Attack Type: warezclient
Sample 4:  Attack Detected | Attack Type: neptune
Sample 5: Normal | Attack Type: normal
Sample 6: Normal | Attack Type: normal
Sample 7: Normal | Attack Type: normal
Sample 8: Normal | Attack Type: normal
Sample 9:  Attack Detected | Attack Type: smurf
Sample 10:  Attack Detected | Attack Type: neptune
