In [1]:
# ✅ Step 1: Import Libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, BatchNormalization, Flatten, Dense, Dropout

# ✅ Step 2: Load Preprocessed Data
print("📥 Loading preprocessed dataset...")
df = pd.read_csv("network_data.csv") 
print(f"✅ Dataset loaded! Shape: {df.shape}")

📥 Loading preprocessed dataset...
✅ Dataset loaded! Shape: (2435692, 122)


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

print("\u2705 Step 1: Libraries imported successfully!")

# Step 2: Extract Features and Target
features = [
    "duration", "packets_count", "fwd_packets_count", "bwd_packets_count",
    "total_payload_bytes", "fwd_total_payload_bytes", "bwd_total_payload_bytes",
    "payload_bytes_max", "payload_bytes_min", "payload_bytes_mean",
    "bytes_rate", "packets_rate",
    "fwd_total_header_bytes", "bwd_total_header_bytes",
    "avg_segment_size", "fwd_avg_segment_size", "bwd_avg_segment_size",
    "fwd_init_win_bytes", "bwd_init_win_bytes",
    "active_mean", "idle_mean", "down_up_rate"
]
target = "label"

print("\u2705 Step 2: Features and target selected.")

# Load dataset (Ensure df is defined before running this script)
X = df[features].copy()
y = df[target]

# Step 3: Handle Missing Values
X.fillna(X.mean(), inplace=True)
print("\u2705 Step 3: Missing values handled.")

# Step 4: Convert Target to Numeric Labels
le = LabelEncoder()
y = le.fit_transform(y)
print("\u2705 Step 4: Target labels encoded.")

# Step 5: Normalize Features
scaler = StandardScaler()
X = scaler.fit_transform(X)
print("\u2705 Step 5: Features normalized.")

# Step 6: Split Data into Train & Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print(f"\u2705 Step 6: Data split completed - Train: {X_train.shape}, Test: {X_test.shape}")

# Step 7: Build Optimized Model
print("\ud83d\ude80 Step 7: Building model...")
model = keras.Sequential([
    keras.layers.Input(shape=(X_train.shape[1],)),
    keras.layers.Dense(256, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.4),
    keras.layers.Dense(128, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.4),
    keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.4),
    keras.layers.Dense(len(np.unique(y)), activation='softmax')
])
print("\u2705 Step 7: Model built successfully!")

# Step 8: Compile Model with Learning Rate Scheduling
lr_schedule = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3, min_lr=1e-6)
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
optimizer = keras.optimizers.Adam(learning_rate=0.001)

model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
print("\u2705 Step 8: Model compiled.")

# Step 9: Train Model with Early Stopping
print("\ud83d\ude80 Step 9: Training model...")
history = model.fit(
    X_train, y_train, epochs=50, batch_size=64,
    validation_data=(X_test, y_test), callbacks=[lr_schedule, early_stopping]
)
print("\u2705 Step 9: Model training completed!")

# Step 10: Evaluate Model
print("\ud83d\ude80 Step 10: Evaluating model...")
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"\u2705 Step 10: Test Accuracy: {test_acc:.4f}")

# Step 11: Save Model & Preprocessing Steps
model.save("network_packet_classifier_v2.h5")
print("\u2705 Step 11: Model saved as 'network_packet_classifier_v2.h5'")

# Step 12: Save Label Encoder & Scaler
import joblib
joblib.dump(le, "label_encoder.pkl")
joblib.dump(scaler, "scaler.pkl")
print("\u2705 Step 12: Preprocessing objects saved.")

print("\ud83c\udf89 Training complete! \ud83d\ude80")


In [19]:
import numpy as np
import pandas as pd
import tensorflow as tf
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the trained model and preprocessing tools
print("✅ Loading model and preprocessing tools...")
model = tf.keras.models.load_model("network_packet_classifier_v2.h5")
scaler = joblib.load("scaler.pkl")
accuracy=0.983672
label_encoder = joblib.load("label_encoder.pkl")
print("✅ Model and preprocessing tools loaded!")

# Load the full dataset (Ensure this file exists or replace with actual DataFrame)


# Define feature columns (Ensure they match those used during training)
features = [
    "duration", "packets_count", "fwd_packets_count", "bwd_packets_count",
    "total_payload_bytes", "fwd_total_payload_bytes", "bwd_total_payload_bytes",
    "payload_bytes_max", "payload_bytes_min", "payload_bytes_mean",
    "bytes_rate", "packets_rate",
    "fwd_total_header_bytes", "bwd_total_header_bytes",
    "avg_segment_size", "fwd_avg_segment_size", "bwd_avg_segment_size",
    "fwd_init_win_bytes", "bwd_init_win_bytes",
    "active_mean", "idle_mean", "down_up_rate"
]
target = "label"

# Separate features and labels
X = df[features]
y = df[target]

# Partition the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print(f"✅ Data split completed: Train -> {X_train.shape}, Test -> {X_test.shape}")

# Scale test features
X_test_scaled = scaler.transform(X_test)

# Predict labels
print("🔄 Making predictions on test data...")
y_pred_prob = model.predict(X_test_scaled)
y_pred_classes = np.argmax(y_pred_prob, axis=1)  # Convert probabilities to class labels

# Calculate accuracy
test_accuracy = accuracy_score(y_test, y_pred_classes)
print(f"🎯 Test Accuracy: {accuracy}")




✅ Loading model and preprocessing tools...
✅ Model and preprocessing tools loaded!
✅ Data split completed: Train -> (1948553, 22), Test -> (487139, 22)
🔄 Making predictions on test data...
[1m15224/15224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 972us/step
🎯 Test Accuracy: 0.983672
