**MACHINE LEARNING**

In [None]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv('/content/dataset_30000.csv')  # Update path as needed

# Optional: Rename columns if needed (if already named, skip this)
# df.columns = [f"col_{i}" for i in range(len(df.columns)-1)] + ['label']

# Keep original label for reference
df['attack_type'] = df['Label']

# Drop unnecessary columns
drop_cols = ['Flow ID', 'Source IP', 'Destination IP', 'Timestamp']
df.drop(columns=[col for col in drop_cols if col in df.columns], inplace=True, errors='ignore')

# ✅ Fixed label processing
df['Label'] = df['Label'].apply(lambda x: 'normal' if 'Normal' in str(x) else 'attack')

# Encode label
label_encoder = LabelEncoder()
df['Label'] = label_encoder.fit_transform(df['Label'])  # normal = 1, attack = 0


# Split features and labels
X = df.drop(['Label', 'attack_type'], axis=1)
y = df['Label']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Random Forest model
model = RandomForestClassifier()
model.fit(X_train_scaled, y_train)

# Evaluate accuracy
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"ML Model Accuracy: {accuracy * 100:.2f}%")

# Save model, scaler, and label encoder
joblib.dump(model, 'ml_model_friday.pkl')
joblib.dump(scaler, 'scaler_friday.pkl')
joblib.dump(label_encoder, 'label_encoder_friday.pkl')


ML Model Accuracy: 100.00%


['label_encoder_friday.pkl']

In [None]:
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder

# Load the testing dataset
df = pd.read_csv('/content/testing_fri.csv')  # Update path if needed

# ✅ Store original attack names before label transformation
original_attacks = df['Label'].values.copy()

# Drop non-feature columns
drop_cols = ['Flow ID', 'Source IP', 'Destination IP', 'Timestamp']
df.drop(columns=[col for col in drop_cols if col in df.columns], inplace=True, errors='ignore')

# Convert label to binary: 'normal' or 'attack'
df['Label'] = df['Label'].apply(lambda x: 'normal' if 'Normal' in str(x) else 'attack')

# Load saved objects from training on dataset_30000.csv
label_encoder = joblib.load('/content/label_encoder_friday.pkl')
scaler = joblib.load('/content/scaler_friday.pkl')
ml_model = joblib.load('/content/ml_model_friday.pkl')

# Encode the label column
df['Label'] = label_encoder.transform(df['Label'])

# Prepare features
X_test = df.drop(['Label'], axis=1)
X_test_scaled = scaler.transform(X_test)

# 🎯 Select 3 random samples
sample_df = df.sample(n=3, random_state=42)
sample_indices = sample_df.index
sample_X_scaled = X_test_scaled[sample_indices]

# Predict
sample_preds = ml_model.predict(sample_X_scaled)
sample_attack_names = original_attacks[sample_indices]  # ✅ Show true attack type

# 🖨️ Display predictions
print("\n🔍 ML Predictions on 3 Random Samples (Friday Testing Dataset):")
for i, (pred, attack_name) in enumerate(zip(sample_preds, sample_attack_names)):
    status = "✅ Normal" if pred == 1 else " Attack Detected"
    print(f"Sample {i+1}: {status} | Attack Type: {attack_name}")



🔍 ML Predictions on 3 Random Samples (Friday Testing Dataset):
Sample 1:  Attack Detected | Attack Type: Portscan
Sample 2:  Attack Detected | Attack Type: Portscan
Sample 3:  Attack Detected | Attack Type: Botnet - Attempted


**DEEP LEARNING**

In [None]:
import pandas as pd
import joblib
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Load the Friday dataset
df = pd.read_csv('/content/dataset_30000.csv')  # Update path if needed

# Keep a copy of the original attack labels
df['attack_type'] = df['Label']

# Drop unnecessary string columns
drop_cols = ['Flow ID', 'Source IP', 'Destination IP', 'Timestamp']
df.drop(columns=[col for col in drop_cols if col in df.columns], inplace=True, errors='ignore')

# Convert label to binary: normal vs attack
df['Label'] = df['Label'].apply(lambda x: 'normal' if 'Normal' in str(x) else 'attack')

# Encode label: normal=1, attack=0
label_encoder = LabelEncoder()
df['Label'] = label_encoder.fit_transform(df['Label'])

# Features and label
X = df.drop(['Label', 'attack_type'], axis=1)
y = df['Label']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build deep learning model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

# Compile
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate on test data
loss, accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"🧠 DL Model Test Accuracy: {accuracy * 100:.2f}%")

# Save model and encoders
model.save('dl_model_friday.h5')
joblib.dump(scaler, 'scaler_friday.pkl')
joblib.dump(label_encoder, 'label_encoder_friday.pkl')




  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m178/178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9110 - loss: nan - val_accuracy: 1.0000 - val_loss: nan
Epoch 2/10
[1m178/178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 1.0000 - loss: nan - val_accuracy: 1.0000 - val_loss: nan
Epoch 3/10
[1m178/178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 1.0000 - loss: nan - val_accuracy: 1.0000 - val_loss: nan
Epoch 4/10
[1m178/178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 1.0000 - loss: nan - val_accuracy: 1.0000 - val_loss: nan
Epoch 5/10
[1m178/178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 1.0000 - loss: nan - val_accuracy: 1.0000 - val_loss: nan
Epoch 6/10
[1m178/178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 1.0000 - loss: nan - val_accuracy: 1.0000 - val_loss: nan
Epoch 7/10
[1m178/178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0



🧠 DL Model Test Accuracy: 100.00%


['label_encoder_friday.pkl']

In [None]:
import pandas as pd
import numpy as np
import joblib
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder

# Load the Friday testing dataset
df = pd.read_csv('/content/testing_fri.csv')  # Update path if needed

# Save original attack type names
original_attacks = df['Label'].values.copy()

# Drop unnecessary columns
drop_cols = ['Flow ID', 'Source IP', 'Destination IP', 'Timestamp']
df.drop(columns=[col for col in drop_cols if col in df.columns], inplace=True, errors='ignore')

# Convert label to 'normal' or 'attack'
df['Label'] = df['Label'].apply(lambda x: 'normal' if 'Normal' in str(x) else 'attack')

# Load pre-trained model and encoders from training on dataset_30000.csv
label_encoder = joblib.load('label_encoder_friday.pkl')
scaler = joblib.load('scaler_friday.pkl')
dl_model = load_model('dl_model_friday.h5')

# Encode labels
df['Label'] = label_encoder.transform(df['Label'])

# Prepare test features
X_test = df.drop(['Label'], axis=1)
X_test_scaled = scaler.transform(X_test)

# Select 10 random samples for testing
sample_df = df.sample(n=10)
sample_indices = sample_df.index
sample_X_scaled = X_test_scaled[sample_indices]
sample_attack_names = original_attacks[sample_indices]

# Predict using DL model
sample_preds_probs = dl_model.predict(sample_X_scaled)
sample_preds = (sample_preds_probs > 0.5).astype(int).flatten()

# 🖨️ Display predictions
print("\n🔍 DL Predictions on 10 Random Samples (Friday Dataset):")
for i, (pred, attack_name) in enumerate(zip(sample_preds, sample_attack_names)):
    status = "Normal" if pred == 1 else "Attack Detected"
    print(f"Sample {i+5}: {status} | Attack Type: {attack_name}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step

🔍 DL Predictions on 10 Random Samples (Friday Dataset):
Sample 5: Attack Detected | Attack Type: Portscan
Sample 6: Attack Detected | Attack Type: BENIGN
Sample 7: Attack Detected | Attack Type: BENIGN
Sample 8: Attack Detected | Attack Type: BENIGN
Sample 9: Attack Detected | Attack Type: BENIGN
Sample 10: Attack Detected | Attack Type: BENIGN
Sample 11: Attack Detected | Attack Type: BENIGN
Sample 12: Attack Detected | Attack Type: BENIGN
Sample 13: Attack Detected | Attack Type: BENIGN
Sample 14: Attack Detected | Attack Type: BENIGN
