In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import (classification_report, confusion_matrix, roc_auc_score, roc_curve, auc, accuracy_score,precision_recall_curve, f1_score)
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import pickle
import os
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')



In [8]:

class BiLSTMSequenceProcessor:
    
    def __init__(self, sequence_length=50):
        self.sequence_length = sequence_length
        self.scaler = MinMaxScaler()
        
    def create_sequences_from_pcap(self, pcap_file, label, max_flows=1000, packets_per_flow=50):
        from scapy.all import PcapReader, IP, TCP, UDP
        
        print(f"\nProcessing {pcap_file} into sequences...")
        
        flow_dict = {}
        sequences = []
        labels = []
        
        try:
            with PcapReader(pcap_file) as pcap:
                for pkt_idx, packet in enumerate(tqdm(pcap, desc="   Reading packets")):
                    if IP not in packet:
                        continue
                    
                    # Extract flow identifier
                    src_ip = packet[IP].src
                    dst_ip = packet[IP].dst
                    proto = packet[IP].proto
                    
                    if TCP in packet:
                        src_port = packet[TCP].sport
                        dst_port = packet[TCP].dport
                    elif UDP in packet:
                        src_port = packet[UDP].sport
                        dst_port = packet[UDP].dport
                    else:
                        src_port = 0
                        dst_port = 0
                    
                    # Create flow key (bidirectional)
                    flow_key = tuple(sorted([
                        (src_ip, src_port, proto),
                        (dst_ip, dst_port, proto)
                    ]))
                    
                    # Extract packet features for sequence
                    features = self._extract_sequence_features(packet)
                    
                    if flow_key not in flow_dict:
                        flow_dict[flow_key] = []
                    
                    flow_dict[flow_key].append(features)
            
            # Convert flows to sequences
            print(f"\n   Converting {len(flow_dict)} flows to sequences...")
            for flow_key, packets in tqdm(flow_dict.items(), desc="   Creating sequences"):
                if len(packets) >= 3:  # Minimum 3 packets per flow
                    # Pad or truncate to sequence_length
                    if len(packets) < self.sequence_length:
                        # Pad with zeros
                        packets = packets + [[0]*13] * (self.sequence_length - len(packets))
                    else:
                        # Take first sequence_length packets
                        packets = packets[:self.sequence_length]
                    
                    sequences.append(np.array(packets))
                    labels.append(label)
                    
                    if len(sequences) >= max_flows:
                        break
            
            print(f"   Created {len(sequences)} sequences")
            return np.array(sequences), np.array(labels)
            
        except Exception as e:
            print(f"   Error: {e}")
            return np.array([]), np.array([])
    
    def _extract_sequence_features(self, packet):
        from scapy.all import IP, TCP, UDP, ICMP, DNS
        
        features = []
        
        try:
            if IP in packet:
                # Packet length
                features.append(len(packet) / 1500.0)  # Normalize
                
                # IP TTL
                features.append(packet[IP].ttl / 255.0)
                
                # IP flags
                features.append(int(packet[IP].flags) / 7.0)
                
                # IP ID (variation indicator)
                features.append((packet[IP].id % 1000) / 1000.0)
                
                if TCP in packet:
                    # TCP sport/dport ratio
                    features.append(packet[TCP].sport / 65535.0)
                    features.append(packet[TCP].dport / 65535.0)
                    
                    # TCP flags
                    features.append(int(packet[TCP].flags) / 63.0)
                    
                    # TCP window size
                    features.append(packet[TCP].window / 65535.0)
                    
                    # Payload length
                    payload_len = len(packet[TCP].payload)
                    features.append(payload_len / 1500.0)
                    
                    # Has options
                    features.append(float(len(packet[TCP].options)))
                    
                    # Entropy indicator
                    features.append(self._payload_entropy(bytes(packet[TCP].payload)))
                    
                    features.extend([0.0, 0.0])  # Padding for UDP
                    
                elif UDP in packet:
                    # UDP sport/dport
                    features.append(packet[UDP].sport / 65535.0)
                    features.append(packet[UDP].dport / 65535.0)
                    
                    # Payload length
                    payload_len = len(packet[UDP].payload)
                    features.append(payload_len / 1500.0)
                    
                    features.extend([0.0, 0.0, 0.0, 0.0, 0.0])  # Padding for TCP
                    
                else:
                    features.extend([0.0] * 10)
                
                # ICMP check
                if ICMP in packet:
                    features.append(1.0)
                else:
                    features.append(0.0)
                
                # DNS check
                if DNS in packet:
                    features.append(1.0)
                else:
                    features.append(0.0)
            else:
                features = [0.0] * 13
        
        except:
            features = [0.0] * 13
        
        return features[:13]  # Ensure exactly 13 features
    
    def _payload_entropy(self, payload):
        if not payload or len(payload) == 0:
            return 0.0
        
        from scipy.stats import entropy
        byte_counts = np.bincount(list(payload), minlength=256)
        probabilities = byte_counts[byte_counts > 0] / len(payload)
        ent = entropy(probabilities, base=2) / 8.0  # Normalize to 0-1
        return min(ent, 1.0)




In [13]:
class BiLSTMClassifier:
    
    def __init__(self):
        self.model = None
        self.history = None
        self.scaler_x = StandardScaler()
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        
    def load_sequences(self, benign_pcap, stego_pcap, max_flows_per_class=1000, 
                       sequence_length=50):
        print(" LOADING PACKET SEQUENCES")
        
        processor = BiLSTMSequenceProcessor(sequence_length=sequence_length)
        
        # Process benign traffic
        X_benign, y_benign = processor.create_sequences_from_pcap(
            benign_pcap, label=0, max_flows=max_flows_per_class
        )
        
        # Process steganographic traffic
        X_stego, y_stego = processor.create_sequences_from_pcap(
            stego_pcap, label=1, max_flows=max_flows_per_class
        )
        
        # Combine datasets
        X = np.concatenate([X_benign, X_stego], axis=0)
        y = np.concatenate([y_benign, y_stego], axis=0)
        
        print(f"\n✓ Total sequences: {len(X)}")
        print(f"  - Benign: {len(X_benign)}")
        print(f"  - Steganography: {len(X_stego)}")
        print(f"  - Sequence length: {sequence_length}")
        print(f"  - Features per packet: 13")
        
        return X, y, processor
    
    def preprocess_sequences(self, X, y, test_size=0.2, random_state=42):
        print(" PREPROCESSING SEQUENCES")
        
        print(f"\nSplitting sequences...")
        
        # Train-test split
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            X, y, test_size=test_size, random_state=random_state, stratify=y
        )
        
        print(f"   Train: {len(self.X_train)} sequences")
        print(f"   Test:  {len(self.X_test)} sequences")
        
        # Normalize sequences (per timestep)
        print(f"\nNormalizing sequences...")
        seq_len, n_features = self.X_train[0].shape
        
        # Reshape for scaling
        X_train_reshaped = self.X_train.reshape(-1, n_features)
        X_test_reshaped = self.X_test.reshape(-1, n_features)
        
        X_train_scaled = self.scaler_x.fit_transform(X_train_reshaped)
        X_test_scaled = self.scaler_x.transform(X_test_reshaped)
        
        self.X_train = X_train_scaled.reshape(self.X_train.shape)
        self.X_test = X_test_scaled.reshape(self.X_test.shape)
        
        print(f"   Sequences normalized")
        print(f"   Shape: {self.X_train.shape}")
    
    def build_bilstm_model(self, sequence_length=50, n_features=13):
        """Build BiLSTM model"""
        print("\n" + "="*70)
        print(" BUILDING BiLSTM MODEL")
        print("="*70)
        
        print(f"\nArchitecture:")
        print(f"   Input shape: ({sequence_length}, {n_features})")
        
        self.model = models.Sequential([
            # BiLSTM layers
            layers.Input(shape=(sequence_length, n_features)),
            
            # First BiLSTM block
            layers.Bidirectional(
                layers.LSTM(128, return_sequences=True, dropout=0.2),
                name='bilstm_1'
            ),
            layers.Dropout(0.3, name='dropout_1'),
            
            # Second BiLSTM block
            layers.Bidirectional(
                layers.LSTM(64, return_sequences=True, dropout=0.2),
                name='bilstm_2'
            ),
            layers.Dropout(0.2, name='dropout_2'),
            
            # Third BiLSTM block
            layers.Bidirectional(
                layers.LSTM(32, return_sequences=False, dropout=0.2),
                name='bilstm_3'
            ),
            layers.Dropout(0.2, name='dropout_3'),
            
            # Dense layers
            layers.Dense(64, activation='relu', name='dense_1'),
            layers.BatchNormalization(name='bn_1'),
            layers.Dropout(0.2, name='dropout_4'),
            
            layers.Dense(32, activation='relu', name='dense_2'),
            layers.Dropout(0.1, name='dropout_5'),
            
            # Output
            layers.Dense(1, activation='sigmoid', name='output')
        ])
        
        print("   Layers:")
        print("   ├─ BiLSTM: 128 (return_seq=True, dropout=0.2) → Dropout(0.3)")
        print("   ├─ BiLSTM: 64 (return_seq=True, dropout=0.2) → Dropout(0.2)")
        print("   ├─ BiLSTM: 32 (return_seq=False, dropout=0.2) → Dropout(0.2)")
        print("   ├─ Dense: 64 → ReLU → BatchNorm → Dropout(0.2)")
        print("   ├─ Dense: 32 → ReLU → Dropout(0.1)")
        print("   └─ Output: 1 → Sigmoid")
        
        # Compile
        self.model.compile(
            optimizer=keras.optimizers.Adam(learning_rate=0.001),
            loss='binary_crossentropy',
            metrics=[
                'accuracy',
                keras.metrics.AUC(name='auc'),
                keras.metrics.Precision(name='precision'),
                keras.metrics.Recall(name='recall')
            ]
        )
        
        print("\nModel compiled")
        print("   Optimizer: Adam (lr=0.001)")
        print("   Loss: Binary Crossentropy")
    
    def train_model(self, epochs=100, batch_size=32, validation_split=0.2):
        print(" TRAINING BiLSTM MODEL")
        
        print(f"\nParameters:")
        print(f"   Epochs: {epochs}")
        print(f"   Batch size: {batch_size}")
        print(f"   Validation split: {validation_split*100:.0f}%")
        
        callbacks = [
            keras.callbacks.EarlyStopping(
                monitor='val_loss',
                patience=20,
                restore_best_weights=True,
                verbose=1
            ),
            keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=10,
                min_lr=1e-6,
                verbose=1
            )
        ]
        
        print(f"\nStarting training...\n")
        
        self.history = self.model.fit(
            self.X_train, self.y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=validation_split,
            callbacks=callbacks,
            verbose=1
        )
        
        print("\n✓ Training completed!")
    
    def evaluate_model(self):
        print(" MODEL EVALUATION")
        
        print("\nGenerating predictions...")
        y_pred_proba = self.model.predict(self.X_test, verbose=0)
        y_pred = (y_pred_proba > 0.5).astype(int).flatten()
        
        # Metrics
        accuracy = accuracy_score(self.y_test, y_pred)
        auc_score = roc_auc_score(self.y_test, y_pred_proba)
        f1 = f1_score(self.y_test, y_pred)
        
        print(f"\nTest Metrics:")
        print(f"   Accuracy:  {accuracy:.4f} ({accuracy*100:.2f}%)")
        print(f"   AUC-ROC:   {auc_score:.4f}")
        print(f"   F1-Score:  {f1:.4f}")
        
        print(f"\nClassification Report:")
        print(classification_report(
            self.y_test, y_pred,
            target_names=['Benign', 'Steganography']
        ))
        
        cm = confusion_matrix(self.y_test, y_pred)
        print(f"\nConfusion Matrix:")
        print(f"   True Negatives:  {cm[0,0]:>6,}")
        print(f"   False Positives: {cm[0,1]:>6,}")
        print(f"   False Negatives: {cm[1,0]:>6,}")
        print(f"   True Positives:  {cm[1,1]:>6,}")
        
        return y_pred, y_pred_proba, cm
    
    def plot_results(self, y_pred, y_pred_proba, cm, output_dir='results'):
        """Plot training and evaluation results"""
        print("\nGenerating visualizations...")
        
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        
        fig = plt.figure(figsize=(16, 12))
        
        # Training history
        ax1 = plt.subplot(2, 3, 1)
        ax1.plot(self.history.history['accuracy'], label='Train', linewidth=2)
        ax1.plot(self.history.history['val_accuracy'], label='Validation', linewidth=2)
        ax1.set_title('Accuracy', fontsize=12, fontweight='bold')
        ax1.set_xlabel('Epoch')
        ax1.set_ylabel('Accuracy')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        
        ax2 = plt.subplot(2, 3, 2)
        ax2.plot(self.history.history['loss'], label='Train', linewidth=2)
        ax2.plot(self.history.history['val_loss'], label='Validation', linewidth=2)
        ax2.set_title('Loss', fontsize=12, fontweight='bold')
        ax2.set_xlabel('Epoch')
        ax2.set_ylabel('Loss')
        ax2.legend()
        ax2.grid(True, alpha=0.3)
        
        ax3 = plt.subplot(2, 3, 3)
        ax3.plot(self.history.history['auc'], label='Train', linewidth=2)
        ax3.plot(self.history.history['val_auc'], label='Validation', linewidth=2)
        ax3.set_title('AUC-ROC', fontsize=12, fontweight='bold')
        ax3.set_xlabel('Epoch')
        ax3.set_ylabel('AUC')
        ax3.legend()
        ax3.grid(True, alpha=0.3)
        
        # Confusion matrix
        ax4 = plt.subplot(2, 3, 4)
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=True, ax=ax4,
                    xticklabels=['Benign', 'Stego'],
                    yticklabels=['Benign', 'Stego'])
        ax4.set_title('Confusion Matrix', fontsize=12, fontweight='bold')
        
        # ROC curve
        ax5 = plt.subplot(2, 3, 5)
        fpr, tpr, _ = roc_curve(self.y_test, y_pred_proba)
        roc_auc = auc(fpr, tpr)
        ax5.plot(fpr, tpr, label=f'ROC (AUC = {roc_auc:.4f})', linewidth=2)
        ax5.plot([0, 1], [0, 1], 'k--', linewidth=1)
        ax5.set_title('ROC Curve', fontsize=12, fontweight='bold')
        ax5.set_xlabel('False Positive Rate')
        ax5.set_ylabel('True Positive Rate')
        ax5.legend()
        ax5.grid(True, alpha=0.3)
        
        # PR curve
        ax6 = plt.subplot(2, 3, 6)
        precision, recall, _ = precision_recall_curve(self.y_test, y_pred_proba)
        pr_auc = auc(recall, precision)
        ax6.plot(recall, precision, label=f'PR (AUC = {pr_auc:.4f})', linewidth=2)
        ax6.set_title('Precision-Recall', fontsize=12, fontweight='bold')
        ax6.set_xlabel('Recall')
        ax6.set_ylabel('Precision')
        ax6.legend()
        ax6.grid(True, alpha=0.3)
        
        plt.tight_layout()
        
        plot_path = os.path.join(output_dir, 'bilstm_results.png')
        plt.savefig(plot_path, dpi=300, bbox_inches='tight')
        print(f" Saved to: {plot_path}")
        plt.close()
    
    # Add this method to the BiLSTMClassifier class in train_bilstm.py

    def save_model(self, output_dir='models'):
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        print(" SAVING MODEL AND ARTIFACTS")
        
        # Save model
        model_path = os.path.join(output_dir, f'bilstm_stego_{timestamp}.h5')
        self.model.save(model_path)
        print(f"\n Model saved: {model_path}")
        
        # Save scaler (IMPORTANT!)
        scaler_path = os.path.join(output_dir, f'scaler_{timestamp}.pkl')
        import pickle
        with open(scaler_path, 'wb') as f:
            pickle.dump(self.scaler_x, f)
        print(f"✓ Scaler saved: {scaler_path}")
        
        # Save model summary
        summary_path = os.path.join(output_dir, f'model_summary_{timestamp}.txt')
        with open(summary_path, 'w', encoding='utf-8') as f:
            self.model.summary(print_fn=lambda x: f.write(x + '\n'))
        print(f" Model summary saved: {summary_path}")
        
        # Save metadata
        metadata = {
            'timestamp': timestamp,
            'sequence_length': 50,
            'n_features': 13,
            'model_path': model_path,
            'scaler_path': scaler_path,
            'training_date': datetime.now().isoformat()
        }
        
        metadata_path = os.path.join(output_dir, f'metadata_{timestamp}.pkl')
        with open(metadata_path, 'wb') as f:
            pickle.dump(metadata, f)
        print(f"✓ Metadata saved: {metadata_path}")
        
        print(" FILES SAVED IN ./models/")
        print(f"\n Files created:")
        print(f"   1. {os.path.basename(model_path)}")
        print(f"   2. {os.path.basename(scaler_path)}")
        print(f"   3. {os.path.basename(summary_path)}")
        print(f"   4. {os.path.basename(metadata_path)}")
        print(f"\n Use these for inference:")
        print(f"   model_path = '{model_path}'")
        print(f"   scaler_path = '{scaler_path}'")
        

In [14]:

def main():
    benign_pcap = "Dataset/Benign_Dump.pcap"
    stego_pcap = "Dataset/steganography_dataset_20251016_233034.pcap"
    
    classifier = BiLSTMClassifier()
    
    X, y, processor = classifier.load_sequences(
        benign_pcap, stego_pcap,
        max_flows_per_class=500,  # Reduced for memory
        sequence_length=50
    )
    
    classifier.preprocess_sequences(X, y)
    
    classifier.build_bilstm_model(
        sequence_length=50,
        n_features=13
    )
    
    classifier.train_model(
        epochs=10,
        batch_size=32,
        validation_split=0.2
    )
    
    y_pred, y_pred_proba, cm = classifier.evaluate_model()
    
    classifier.plot_results(y_pred, y_pred_proba, cm)
    
    # Save
    classifier.save_model()
    
    print("BiLSTM Training Complete!")


if __name__ == "__main__":
    main()

 LOADING PACKET SEQUENCES

Processing Dataset/Benign_Dump.pcap into sequences...


   Reading packets: 86000it [00:20, 4119.03it/s]



   Converting 38444 flows to sequences...


   Creating sequences:  49%|████▉     | 18752/38444 [00:00<00:00, 506573.33it/s]


   Created 500 sequences

Processing Dataset/steganography_dataset_20251016_233034.pcap into sequences...


   Reading packets: 92067it [00:35, 2564.79it/s]



   Converting 81985 flows to sequences...


   Creating sequences:  14%|█▎        | 11187/81985 [00:00<00:00, 360190.67it/s]


   Created 500 sequences

✓ Total sequences: 1000
  - Benign: 500
  - Steganography: 500
  - Sequence length: 50
  - Features per packet: 13
 PREPROCESSING SEQUENCES

Splitting sequences...
   Train: 800 sequences
   Test:  200 sequences

Normalizing sequences...
   Sequences normalized
   Shape: (800, 50, 13)

 BUILDING BiLSTM MODEL

Architecture:
   Input shape: (50, 13)
   Layers:
   ├─ BiLSTM: 128 (return_seq=True, dropout=0.2) → Dropout(0.3)
   ├─ BiLSTM: 64 (return_seq=True, dropout=0.2) → Dropout(0.2)
   ├─ BiLSTM: 32 (return_seq=False, dropout=0.2) → Dropout(0.2)
   ├─ Dense: 64 → ReLU → BatchNorm → Dropout(0.2)
   ├─ Dense: 32 → ReLU → Dropout(0.1)
   └─ Output: 1 → Sigmoid

Model compiled
   Optimizer: Adam (lr=0.001)
   Loss: Binary Crossentropy
 TRAINING BiLSTM MODEL

Parameters:
   Epochs: 10
   Batch size: 32
   Validation split: 20%

Starting training...

Epoch 1/10
20/20 ━━━━━━━━━━━━━━━━━━━━ 2:03 6s/step - accuracy: 0.5000 - auc: 0.5263 - loss: 0.7209 - precision: 0.333




Test Metrics:
   Accuracy:  0.9900 (99.00%)
   AUC-ROC:   1.0000
   F1-Score:  0.9901

Classification Report:
               precision    recall  f1-score   support

       Benign       1.00      0.98      0.99       100
Steganography       0.98      1.00      0.99       100

     accuracy                           0.99       200
    macro avg       0.99      0.99      0.99       200
 weighted avg       0.99      0.99      0.99       200


Confusion Matrix:
   True Negatives:      98
   False Positives:      2
   False Negatives:      0
   True Positives:     100

Generating visualizations...
 Saved to: results\bilstm_results.png
 SAVING MODEL AND ARTIFACTS





 Model saved: models\bilstm_stego_20251017_000558.h5
✓ Scaler saved: models\scaler_20251017_000558.pkl


 Model summary saved: models\model_summary_20251017_000558.txt
✓ Metadata saved: models\metadata_20251017_000558.pkl
 FILES SAVED IN ./models/

 Files created:
   1. bilstm_stego_20251017_000558.h5
   2. scaler_20251017_000558.pkl
   3. model_summary_20251017_000558.txt
   4. metadata_20251017_000558.pkl

 Use these for inference:
   model_path = 'models\bilstm_stego_20251017_000558.h5'
   scaler_path = 'models\scaler_20251017_000558.pkl'
BiLSTM Training Complete!
