In [20]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dense, LayerNormalization, AveragePooling1D, GlobalAveragePooling1D
from sklearn.preprocessing import StandardScaler, LabelEncoder,RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

In [24]:
class EarlyNetworkIDSModel:
    def __init__(self, max_header_size=50, max_payload_size=400):
     
        self.max_header_size = max_header_size
        self.max_payload_size = max_payload_size
        self.model = None
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        
    def preprocess_features(self, dataframe):
   
        dataframe.replace([np.inf, -np.inf], np.nan, inplace=True)
        dataframe.dropna(inplace=True)
        

        feature_columns = dataframe.columns[:-1]
        
      
        normalized_features = self.scaler.fit_transform(dataframe[feature_columns])
        
        labels = dataframe[' Label'].values
        labels = self.label_encoder.fit_transform(labels)
        
        return normalized_features, labels
    
    def prepare_dataset(self, features, labels, segmentation_rate):
      
        segmented_flows = []
        segmented_labels = []
        
        n_samples = features.shape[0]
        
        for i in range(n_samples):
            flow = features[i]
            label = labels[i]
            T = flow.shape[0]
            

            segment_size = max(1, int(segmentation_rate * T))

            num_segments = max(1, int((T - 1) / segment_size))
            
       
            segmented_flows.append(flow)
            segmented_labels.append(label)
            
        return np.array(segmented_flows), np.array(segmented_labels)
    
    def build_model(self, input_shape, num_classes):
   
        self.model = Sequential([
            Conv1D(
                filters=32,
                kernel_size=1,
                padding='valid',
                activation='relu',
                use_bias=True,
                input_shape=input_shape
            ),
            LayerNormalization(),
            AveragePooling1D(pool_size=2, padding='same'),
            GlobalAveragePooling1D(),
            Dense(64, activation='relu'),
            Dense(num_classes, activation='softmax')
        ])
        self.model.summary()
        
        self.model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
            loss=tf.keras.losses.CategoricalCrossentropy(),
            metrics=['accuracy']
        )
        
        return self.model
    
    def train(self, X_train, y_train, X_val, y_val, num_classes, segmentation_rate, batch_size=32, epochs=50):
       
        class FlowSequence(tf.keras.utils.Sequence):
            def __init__(self, X, y, batch_size):
                self.X = X
                self.y = y
                self.batch_size = batch_size

            def __len__(self):
                return int(np.ceil(len(self.X) / self.batch_size))

            def __getitem__(self, idx):
                start_idx = idx * self.batch_size
                end_idx = min((idx + 1) * self.batch_size, len(self.X))
                batch_x = self.X[start_idx:end_idx]
                batch_y = self.y[start_idx:end_idx]

                feature_dim = batch_x[0].shape[-1]
                max_length = max(len(x) for x in batch_x)
                padded_batch = np.zeros((len(batch_x), max_length, feature_dim))

                for i, x in enumerate(batch_x):
                    if len(x.shape) == 1:
                        x = x.reshape(1, -1)
                    padded_batch[i, :len(x)] = x

                return padded_batch, batch_y

        y_train_categorical = tf.keras.utils.to_categorical(y_train, num_classes)
        y_val_categorical = tf.keras.utils.to_categorical(y_val, num_classes)

      
        class_weights = compute_class_weight(
            class_weight='balanced',
            classes=np.unique(y_train),
            y=y_train
        )
        class_weights = dict(enumerate(class_weights))

       
        train_sequence = FlowSequence(X_train, y_train_categorical, batch_size)
        val_sequence = FlowSequence(X_val, y_val_categorical, batch_size)

        history = self.model.fit(
            train_sequence,
            epochs=epochs,
            validation_data=val_sequence,
            class_weight=class_weights
        )

        return history

    def evaluate(self, X_val, y_val, segmentation_rate):
       
        predictions = []
        

        batch_size = 32
        for i in range(0, len(X_val), batch_size):
            batch_x = X_val[i:i+batch_size]
            feature_dim = batch_x[0].shape[-1]
            max_length = max(len(x) for x in batch_x)
            padded_batch = np.zeros((len(batch_x), max_length, feature_dim))
            
            for j, x in enumerate(batch_x):
                if len(x.shape) == 1:
                    x = x.reshape(1, -1)
                padded_batch[j, :len(x)] = x
            
            batch_pred = self.model.predict(padded_batch)
            predictions.extend(batch_pred)
            
        y_pred = np.argmax(predictions, axis=1)
        

        num_classes = len(np.unique(y_val))
        metrics = self._calculate_metrics(y_val, y_pred, X_val, segmentation_rate, num_classes)
        
        return metrics
    
    def _calculate_metrics(self, y_true, y_pred, X_val, segmentation_rate, num_classes):
    
        precision = np.zeros(num_classes)
        recall = np.zeros(num_classes)
        fpr = np.zeros(num_classes)
        
        for i in range(num_classes):
            tp = np.sum((y_true == i) & (y_pred == i))
            fp = np.sum((y_true != i) & (y_pred == i))
            fn = np.sum((y_true == i) & (y_pred != i))
            tn = np.sum((y_true != i) & (y_pred != i))
            
            precision[i] = tp / (tp + fp) if (tp + fp) > 0 else 0
            recall[i] = tp / (tp + fn) if (tp + fn) > 0 else 0
            fpr[i] = fp / (fp + tn) if (fp + tn) > 0 else 0
        
        balanced_accuracy = np.mean(recall)
        bm = np.mean([recall[i] + (1 - fpr[i]) - 1 for i in range(num_classes)])
        
        earliness_metrics = self._calculate_earliness(X_val, y_true, y_pred, segmentation_rate)
        
        return {
            "precision": precision.tolist(),
            "recall": recall.tolist(),
            "false_positive_rate": fpr.tolist(),
            "balanced_accuracy": balanced_accuracy,
            "bookmaker_informedness": bm,
            "average_earliness": earliness_metrics
        }
    
    def _calculate_earliness(self, X_val, y_true, y_pred, segmentation_rate):
      
        total_earliness = 0
        count = 0
        
        for i, x in enumerate(X_val):
            if y_true[i] == y_pred[i]:
                sequence_length = len(x)
                position = int(sequence_length * segmentation_rate)
                earliness = 1 - (position / sequence_length)
                total_earliness += earliness
                count += 1
        
        return total_earliness / count if count > 0 else 0

In [25]:
def create_and_train_model(data_path, segmentation_rate=0.25, **kwargs):
    
  
    model = EarlyNetworkIDSModel(**kwargs)
    
  
    dataframe = pd.read_csv(data_path)
    features, labels = model.preprocess_features(dataframe)
    
  
    X, y = model.prepare_dataset(features, labels, segmentation_rate)
    
   
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    

    input_shape = (None, features.shape[-1])
    num_classes = len(np.unique(y))
    model.build_model(input_shape, num_classes)
    
 
    history = model.train(X_train, y_train, X_val, y_val, num_classes, segmentation_rate)
    
   
    metrics = model.evaluate(X_val, y_val, segmentation_rate)
    
    return model, history, metrics

In [26]:

model, history, metrics = create_and_train_model(
    data_path="Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv",
    segmentation_rate=0.25,
    max_header_size=50,
    max_payload_size=400
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m  32/4256[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m25s[0m 6ms/step - accuracy: 0.8053 - loss: 1.4607 

  self._warn_if_super_not_called()


[1m4256/4256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 7ms/step - accuracy: 0.5189 - loss: 1.5076 - val_accuracy: 0.8840 - val_loss: 1.1249
Epoch 2/50
[1m4256/4256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 8ms/step - accuracy: 0.9447 - loss: 1.0992 - val_accuracy: 0.9644 - val_loss: 0.3807
Epoch 3/50
[1m4256/4256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 8ms/step - accuracy: 0.9133 - loss: 0.9422 - val_accuracy: 0.9750 - val_loss: 0.1836
Epoch 4/50
[1m4256/4256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 6ms/step - accuracy: 0.9081 - loss: 0.6494 - val_accuracy: 0.9319 - val_loss: 0.2016
Epoch 5/50
[1m4256/4256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 7ms/step - accuracy: 0.9181 - loss: 0.5989 - val_accuracy: 0.9211 - val_loss: 0.2330
Epoch 6/50
[1m4256/4256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 7ms/step - accuracy: 0.9011 - loss: 0.6588 - val_accuracy: 0.9489 - val_loss: 0.1593
Epoch 7/50
[1m4256/4

In [27]:
import pickle
import json
import os
from datetime import datetime

class IDSModelHandler:
    @staticmethod
    def save_model(model, history, metrics, base_path="saved_models"):
       
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        model_dir = os.path.join(base_path, f"model_{timestamp}")
        
     
        os.makedirs(model_dir, exist_ok=True)
        
       
        model_path = os.path.join(model_dir, "model.pkl")
        with open(model_path, 'wb') as f:
            pickle.dump(model, f)
        
     
        history_dict = history.history if hasattr(history, 'history') else history
        history_path = os.path.join(model_dir, "history.json")
        with open(history_path, 'w') as f:
            json.dump(history_dict, f)
        
       
        metrics_path = os.path.join(model_dir, "metrics.json")
        with open(metrics_path, 'w') as f:
            json.dump(metrics, f)
        
      
        metadata = {
            "timestamp": timestamp,
            "model_path": model_path,
            "history_path": history_path,
            "metrics_path": metrics_path
        }
        metadata_path = os.path.join(model_dir, "metadata.json")
        with open(metadata_path, 'w') as f:
            json.dump(metadata, f)
        
        return model_dir

    @staticmethod
    def load_model(model_dir):
       
        metadata_path = os.path.join(model_dir, "metadata.json")
        with open(metadata_path, 'r') as f:
            metadata = json.load(f)
        
   
        with open(metadata['model_path'], 'rb') as f:
            model = pickle.load(f)
        
      
        with open(metadata['history_path'], 'r') as f:
            history = json.load(f)
        
     
        with open(metadata['metrics_path'], 'r') as f:
            metrics = json.load(f)
        
        return model, history, metrics




In [28]:
handler = IDSModelHandler()
saved_model_dir = handler.save_model(model, history, metrics)
print(f"Model saved to: {saved_model_dir}")

Model saved to: saved_models\model_20241222_085307
