# Layer by Layer approach for bulding a MLP (Multi-Layer Perseptron) Model

In [49]:
import numpy as np
from tqdm import tqdm

class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    def forward(self, input):
        pass

    def backward(self, output_gradient, learning_rate):
        pass

class Dense(Layer):
    def __init__(self, output_size):
        """
        Initialize the Dense layer with given output size.

        Parameters:
        output_size (int): Number of neurons in this layer.
        """
        self.output_size = output_size
        self.weights = None
        self.bias = None

    def initialize(self, input_size):
        """
        Initialize weights and biases based on the input size.

        Parameters:
        input_size (int): Size of the input to this layer.
        """
        self.weights = np.random.randn(self.output_size, input_size) 
        self.bias = np.zeros((self.output_size, 1))

    def forward(self, input):
        """
        Perform the forward pass through the dense layer.

        Parameters:
        input (np.ndarray): Input data to the layer (shape: [input_size, 1]).

        Returns:
        np.ndarray: Output data from the layer (shape: [output_size, 1]).
        """
        self.input = input
        if self.weights is None:
            self.initialize(input.shape[0])
        self.output = np.dot(self.weights, self.input) + self.bias
        return self.output

    def backward(self, output_gradient, learning_rate):
        """
        Perform the backward pass through the dense layer and update its parameters.

        Parameters:
        output_gradient (np.ndarray): Gradient of the loss function with respect to the layer's output.
        learning_rate (float): Learning rate for updating the weights and biases.

        Returns:
        np.ndarray: Gradient of the loss function with respect to the input data.
        """
        weights_gradient = np.dot(output_gradient, self.input.T)
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return np.dot(self.weights.T, output_gradient)

class Activation(Layer):
    def __init__(self, activation, activation_prime):
        """
        Initialize the Activation layer with a specific activation function and its derivative.

        Parameters:
        activation (callable): Activation function to be applied.
        activation_prime (callable): Derivative of the activation function.
        """
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        """
        Perform the forward pass through the activation function.

        Parameters:
        input (np.ndarray): Input data to the activation function.

        Returns:
        np.ndarray: Output data after applying the activation function.
        """
        self.input = input
        return self.activation(self.input)

    def backward(self, output_gradient, learning_rate):
        """
        Perform the backward pass through the activation function.

        Parameters:
        output_gradient (np.ndarray): Gradient of the loss function with respect to the activation function's output.
        learning_rate (float): Learning rate (not used in this method, but included for consistency).

        Returns:
        np.ndarray: Gradient of the loss function with respect to the input data.
        """
        return np.multiply(output_gradient, self.activation_prime(self.input))

class Sigmoid(Activation):
    def __init__(self):
        """
        Initialize the Sigmoid activation function and its derivative.
        """
        def sigmoid(x):
            return 1 / (1 + np.exp(-x))

        def sigmoid_prime(x):
            s = sigmoid(x)
            return s * (1 - s)

        super().__init__(sigmoid, sigmoid_prime)

class ReLU(Activation):
    def __init__(self):
        """
        Initialize the Sigmoid activation function and its derivative.
        """
        def ReLU(x):
            return np.maximum(0, x)

        def ReLU_prime(x):
            return 1 if x > 0 else 0

        super().__init__(ReLU, ReLU_prime)


class Loss:
    @staticmethod
    def mse(y_true, y_pred):
        """
        Calculate the Mean Squared Error (MSE) loss.

        Parameters:
        y_true (np.ndarray): True label.
        y_pred (np.ndarray): Predicted label.

        Returns:
        float: MSE loss value.
        """
        return np.mean(np.power(y_true - y_pred, 2))

    @staticmethod
    def mse_derivative(y_true, y_pred):
        """
        Calculate the derivative of the Mean Squared Error (MSE) loss function.

        Parameters:
        y_true (np.ndarray): True label.
        y_pred (np.ndarray): Predicted label.

        Returns:
        np.ndarray: Gradient of MSE loss with respect to prediction.
        """
        return 2 * (y_pred - y_true) / np.size(y_true)

class CometNet:
    def __init__(self, layers, input_size):
        """
        Initialize the CometNet class for training and predicting with a neural network.

        Parameters:
        layers (list): List of layers in the network (each layer is an instance of a Layer subclass).
        input_size (int): Size of the input features.
        """
        self.layers = layers
        self.input_size = input_size

    def predict(self, input):
        """
        Perform a forward pass through the network to make predictions.

        Parameters:
        input (np.ndarray): Input data to the network (shape: [input_size, 1]).

        Returns:
        np.ndarray: Output prediction from the network (shape: [output_size, 1]).
        """
        output = input
        for layer in self.layers:
            output = layer.forward(output)
        return output

    def train(self, X, y, epochs=10, learning_rate=0.1):
        """
        Train the neural network using the provided training data.

        Parameters:
        X (np.ndarray): Training input data.
        y (np.ndarray): Training target data.
        epochs (int): Number of training epochs (default: 10).
        learning_rate (float): Learning rate for updating the parameters (default: 0.1).
        """
        for epoch in range(epochs):
            error = 0
            for x, y_true in tqdm(zip(X, y), total=len(X), desc=f"Epoch {epoch + 1}/{epochs}"):
                x = x.reshape(-1, 1)
                y_true = np.array([[y_true]], dtype=float)
                
                output = self.predict(x)
                error += Loss.mse(y_true, output)
                
                grad = Loss.mse_derivative(y_true, output)
                for layer in reversed(self.layers):
                    grad = layer.backward(grad, learning_rate)
            
            error /= len(X)
            print(f"Epoch {epoch + 1}/{epochs} - Error: {error:.6f}")


In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.utils import resample

class DataPreprocessor:
    """
    A class for preprocessing network traffic data.
    """

    def __init__(self, datetime_col='Timestamp', label_col='Label', random_state=42):
        """
        Initialize the DataPreprocessor.

        Args:
            datetime_col (str): Name of the datetime column.
            label_col (str): Name of the label column.
            random_state (int): Random seed for reproducibility.
        """
        self.datetime_col = datetime_col
        self.label_col = label_col
        self.random_state = random_state
        self.label_encoder = LabelEncoder()
        self.scaler = MinMaxScaler()
        self.imputer = SimpleImputer(strategy='mean')

    def read_and_combine_data(self, file_paths):
        """
        Read CSV files and combine them into a single DataFrame.

        Args:
            file_paths (list): List of paths to the CSV files.

        Returns:
            pd.DataFrame: Combined DataFrame.
        """
        df_list = [pd.read_csv(file_path, encoding='latin1') for file_path in file_paths]
        df = pd.concat(df_list).reset_index(drop=True)
        df.columns = df.columns.str.strip().str.replace(' ', '_')
        return df

    def preprocess_data(self, df):
        """
        Preprocess the data by encoding labels, handling timestamps, and dropping unnecessary columns.

        Args:
            df (pd.DataFrame): Input DataFrame.

        Returns:
            pd.DataFrame: Preprocessed DataFrame.
        """
        # Encode labels
        df[self.label_col] = self.label_encoder.fit_transform(df[self.label_col])
        df[self.label_col] = df[self.label_col].apply(lambda x: 0 if x == 0 else 1)

        # Handle timestamp
        if self.datetime_col in df.columns:
            df[self.datetime_col] = pd.to_datetime(df[self.datetime_col], errors='coerce')
            df.dropna(subset=[self.datetime_col], inplace=True)
            df['minutes_from_midnight'] = (df[self.datetime_col].dt.hour * 60 +
                                           df[self.datetime_col].dt.minute +
                                           df[self.datetime_col].dt.second / 60 +
                                           df[self.datetime_col].dt.microsecond / 60000000)
            df.drop(columns=[self.datetime_col], inplace=True)

        # Drop unnecessary columns
        columns_to_drop = ['Flow_ID', 'Source_IP', 'Destination_IP']
        df.drop(columns=[col for col in columns_to_drop if col in df.columns], inplace=True)

        return df

    def resample_data(self, df, proportions):
        """
        Resample the data to achieve desired class proportions.

        Args:
            df (pd.DataFrame): Input DataFrame.
            proportions (list): Desired proportions for each label.

        Returns:
            pd.DataFrame: Resampled DataFrame.
        """
        df_majority = df[df[self.label_col] == 0]
        df_minority = df[df[self.label_col] == 1]

        n_samples_majority = int(len(df) * proportions[0])
        n_samples_minority = int(len(df) * proportions[1])

        df_majority_resampled = resample(df_majority, replace=False, n_samples=n_samples_majority, random_state=self.random_state)
        df_minority_resampled = resample(df_minority, replace=True, n_samples=n_samples_minority, random_state=self.random_state)

        return pd.concat([df_majority_resampled, df_minority_resampled])

    def select_features(self, df):
        """
        Select features based on a predefined list.

        Args:
            df (pd.DataFrame): Input DataFrame.

        Returns:
            pd.DataFrame: DataFrame with selected features.
        """
        selected_features = [
            'Source_Port', 'Destination_Port', 'Protocol', 'Flow_Duration', 'Fwd_Packet_Length_Max',
            'Fwd_Packet_Length_Min', 'Fwd_Packet_Length_Mean', 'Fwd_Packet_Length_Std',
            'Bwd_Packet_Length_Max', 'Bwd_Packet_Length_Min', 'Bwd_Packet_Length_Mean',
            'Bwd_Packet_Length_Std', 'Flow_IAT_Mean', 'Flow_IAT_Std', 'Flow_IAT_Max',
            'Fwd_IAT_Total', 'Fwd_IAT_Mean', 'Fwd_IAT_Std', 'Fwd_IAT_Max', 'Bwd_IAT_Std',
            'Bwd_IAT_Max', 'Fwd_PSH_Flags', 'Bwd_Packets/s', 'Min_Packet_Length',
            'Max_Packet_Length', 'Packet_Length_Mean', 'Packet_Length_Std',
            'Packet_Length_Variance', 'FIN_Flag_Count', 'SYN_Flag_Count', 'ACK_Flag_Count',
            'URG_Flag_Count', 'Down/Up_Ratio', 'Average_Packet_Size', 'Avg_Fwd_Segment_Size',
            'Avg_Bwd_Segment_Size', 'Init_Win_bytes_forward', 'Init_Win_bytes_backward',
            'Idle_Mean', 'Idle_Max', 'Idle_Min', 'minutes_from_midnight'
        ]
        return df[selected_features + [self.label_col]]

    def scale_and_impute(self, X_train, X_test):
        """
        Scale features and impute missing values.

        Args:
            X_train (np.array): Training feature set.
            X_test (np.array): Test feature set.

        Returns:
            tuple: Scaled and imputed training and test sets.
        """
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)

        X_train_imputed = self.imputer.fit_transform(X_train_scaled)
        X_test_imputed = self.imputer.transform(X_test_scaled)

        return X_train_imputed, X_test_imputed

    def process_data(self, file_paths, proportions, verbose=False):
        """
        Process the data through all preprocessing steps.

        Args:
            file_paths (list): List of paths to the CSV files.
            proportions (list): Desired proportions for each label.
            verbose (bool): Whether to print additional information.

        Returns:
            tuple: Processed training and test sets (X_train, X_test, y_train, y_test).
        """
        df = self.read_and_combine_data(file_paths)
        df = self.preprocess_data(df)
        df_resampled = self.resample_data(df, proportions)
        df_filtered = self.select_features(df_resampled)

        y = df_filtered[self.label_col]
        X = df_filtered.drop(columns=[self.label_col])

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=self.random_state)
        X_train_imputed, X_test_imputed = self.scale_and_impute(X_train, X_test)

        if verbose:
            print("Final shapes:")
            print("X_train shape:", X_train_imputed.shape)
            print("X_test shape:", X_test_imputed.shape)
            print("y_train shape:", y_train.shape)
            print("y_test shape:", y_test.shape)

        return X_train_imputed, X_test_imputed, y_train, y_test

In [43]:
# Define file paths
train_file_paths = [
    r'GeneratedLabelledFlows (1)\Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv',
    r'GeneratedLabelledFlows (1)\Monday-WorkingHours.pcap_ISCX.csv',
    r'GeneratedLabelledFlows (1)\Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv'
]
test_file_paths = [
    r'GeneratedLabelledFlows (1)\Wednesday-workingHours.pcap_ISCX.csv'
]

# Define desired proportions for each label
proportions = [0.5, 0.5]

# Initialize DataPreprocessor
preprocessor = DataPreprocessor(datetime_col='Timestamp', label_col='Label', random_state=42)

# Process training data
print("Processing training data:")
X_train, X_test, y_train, y_test = preprocessor.process_data(train_file_paths, proportions, verbose=False)

# Process test data
print("\nProcessing test data:")
X_train_test, X_test_test, y_train_test, y_test_test = preprocessor.process_data(test_file_paths, proportions, verbose=True)

Processing training data:


  df_list = [pd.read_csv(file_path, encoding='latin1') for file_path in file_paths]



Processing test data:
Final shapes:
X_train shape: (554161, 42)
X_test shape: (138541, 42)
y_train shape: (554161,)
y_test shape: (138541,)


In [44]:
X_train[1].shape

(42,)

In [53]:
X_train = np.array(X_train)  # Ensure X_train is a numpy array
y_train = np.array(y_train)  # Ensure y_train is a numpy array
# Define the network architecture
input_size = X_train[0].shape  # User-defined input size
network = [
    Dense(128),
    ReLU(),
    Dense(250),
    Sigmoid(),
    Dense(150),
    Sigmoid(),
    Dense(1),
    Sigmoid()
]

# Create the model
model = CometNet(network, input_size)

# Train the network
model.train(X_train, y_train, epochs=8, learning_rate=0.01)



Epoch 1/8: 100%|██████████| 316888/316888 [11:17<00:00, 468.01it/s] 


Epoch 1/8 - Error: 0.038216


Epoch 2/8:  38%|███▊      | 118974/316888 [03:30<05:50, 565.28it/s]


KeyboardInterrupt: 

In [51]:
X_test = np.array(X_test)
y_test = np.array(y_test)

X_test = X_test.reshape(input_size[0], -1)
y_test = y_test.reshape(y_test.shape[0], -1)

# test the model
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)
y_pred_classes = y_pred_classes.T
print(y_pred.T.shape)
print(y_test.shape)
print(y_pred_classes.shape)
y_pred_classes = y_pred_classes.reshape(y_pred_classes.shape[0], -1)


# # check confution matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred_classes)
print(cm)

#classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_classes))


(79222, 1)
(79222, 1)
(79222, 1)
[[35659  4049]
 [35694  3820]]
              precision    recall  f1-score   support

           0       0.50      0.90      0.64     39708
           1       0.49      0.10      0.16     39514

    accuracy                           0.50     79222
   macro avg       0.49      0.50      0.40     79222
weighted avg       0.49      0.50      0.40     79222

