In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import gc
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/very-downsampled-csecicids/downsampled_original_ratio.csv
/kaggle/input/very-downsampled-csecicids/downsampled_custom_ratio.csv


#### imports

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, f1_score
import seaborn as sns
import matplotlib.pyplot as plt
from typing import List, Dict, Any, Union, Optional, Tuple

#### Preprocess

In [3]:
class DataPreprocessor:
    def __init__(self):
        self.feature_encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
        self.target_encoder = OneHotEncoder(sparse=False)
        
    def fit_transform(self, X: pd.DataFrame, y: pd.Series) -> tuple:
        X_encoded = self.feature_encoder.fit_transform(X)
        y_encoded = self.target_encoder.fit_transform(y.values.reshape(-1, 1))
        return X_encoded, y_encoded
    
    def transform(self, X: pd.DataFrame, y: Optional[pd.Series] = None) -> Union[np.ndarray, tuple]:
        X_encoded = self.feature_encoder.transform(X)
        if y is not None:
            y_encoded = self.target_encoder.transform(y.values.reshape(-1, 1))
            return X_encoded, y_encoded
        return X_encoded

#### multi head attention

In [4]:
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model: int, num_heads: int):
        super().__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        
        assert d_model % num_heads == 0
        
        self.depth = d_model // num_heads
        
        self.wq = tf.keras.layers.Dense(d_model)
        self.wk = tf.keras.layers.Dense(d_model)
        self.wv = tf.keras.layers.Dense(d_model)
        
        self.dense = tf.keras.layers.Dense(d_model)
    
    def split_heads(self, x: tf.Tensor, batch_size: int) -> tf.Tensor:
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])
    
    def call(self, v: tf.Tensor, k: tf.Tensor, q: tf.Tensor, mask: Optional[tf.Tensor] = None) -> tf.Tensor:
        batch_size = tf.shape(q)[0]
        
        q = self.wq(q)
        k = self.wk(k)
        v = self.wv(v)
        
        q = self.split_heads(q, batch_size)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)
        
        scaled_attention = self.scaled_dot_product_attention(q, k, v, mask)
        
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))
        
        output = self.dense(concat_attention)
        return output
    
    def scaled_dot_product_attention(self, q: tf.Tensor, k: tf.Tensor, v: tf.Tensor, 
                                   mask: Optional[tf.Tensor] = None) -> tf.Tensor:
        matmul_qk = tf.matmul(q, k, transpose_b=True)
        
        dk = tf.cast(tf.shape(k)[-1], tf.float32)
        scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)
        
        if mask is not None:
            scaled_attention_logits += (mask * -1e9)
        
        attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
        output = tf.matmul(attention_weights, v)
        return output

#### Transformer block

In [5]:
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, d_model: int, num_heads: int, dff: int, dropout_rate: float = 0.1):
        super().__init__()
        
        self.mha = MultiHeadAttention(d_model, num_heads)
        self.ffn = tf.keras.Sequential([
            tf.keras.layers.Dense(dff, activation='relu'),
            tf.keras.layers.Dense(d_model)
        ])
        
        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        
        self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
        self.dropout2 = tf.keras.layers.Dropout(dropout_rate)
    
    def call(self, x: tf.Tensor, training: bool = False, mask: Optional[tf.Tensor] = None) -> tf.Tensor:
        attn_output = self.mha(x, x, x, mask)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(x + attn_output)
        
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

#### Classifier

In [6]:
class TransformerClassifier(tf.keras.Model):
    def __init__(self, 
                 input_dim: int,
                 num_classes: int,
                 d_model: int = 128,
                 num_layers: int = 4,
                 num_heads: int = 8,
                 dff: int = 512,
                 dropout_rate: float = 0.1):
        super().__init__()
        
        self.d_model = d_model
        self.num_layers = num_layers
        
        self.embedding = tf.keras.layers.Dense(d_model)
        self.pos_encoding = self.positional_encoding(input_dim, d_model)
        
        self.transformer_blocks = [
            TransformerBlock(d_model, num_heads, dff, dropout_rate)
            for _ in range(num_layers)
        ]
        
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
        self.final_layer = tf.keras.layers.Dense(num_classes, activation='softmax')
    
    def positional_encoding(self, position: int, d_model: int) -> tf.Tensor:
        angle_rads = self.get_angles(
            np.arange(position)[:, np.newaxis],
            np.arange(d_model)[np.newaxis, :],
            d_model
        )
        
        angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
        angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
        
        pos_encoding = angle_rads[np.newaxis, ...]
        return tf.cast(pos_encoding, dtype=tf.float32)
    
    def get_angles(self, pos: np.ndarray, i: np.ndarray, d_model: int) -> np.ndarray:
        angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
        return pos * angle_rates
    
    def call(self, x: tf.Tensor, training: bool = False) -> tf.Tensor:
        x = self.embedding(x)
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        x += self.pos_encoding[:, :tf.shape(x)[1], :]
        
        x = self.dropout(x, training=training)
        
        for transformer_block in self.transformer_blocks:
            x = transformer_block(x, training=training)
        
        # Global average pooling
        x = tf.reduce_mean(x, axis=1)
        
        return self.final_layer(x)

#### Metrics

In [7]:
class ModelEvaluator:
    def __init__(self, model: tf.keras.Model, preprocessor: DataPreprocessor):
        self.model = model
        self.preprocessor = preprocessor
        
    def evaluate(self, X: pd.DataFrame, y: pd.Series) -> Dict[str, Any]:
        """
        Evaluate the model and return various metrics
        """
        # Transform data
        X_encoded, y_encoded = self.preprocessor.transform(X, y)
        
        # Get predictions
        y_pred_proba = self.model.predict(X_encoded)
        y_pred = np.argmax(y_pred_proba, axis=1)
        y_true = np.argmax(y_encoded, axis=1)
        
        # Get class labels
        class_labels = self.preprocessor.target_encoder.categories_[0]
        
        # Calculate metrics
        metrics = {}
        
        # Basic metrics
        metrics['accuracy'] = np.mean(y_pred == y_true)
        metrics['f1_micro'] = f1_score(y_true, y_pred, average='micro')
        metrics['f1_macro'] = f1_score(y_true, y_pred, average='macro')
        metrics['f1_weighted'] = f1_score(y_true, y_pred, average='weighted')
        
        # Confusion matrix
        metrics['confusion_matrix'] = confusion_matrix(y_true, y_pred)
        
        # Classification report
        metrics['classification_report'] = classification_report(
            y_true, 
            y_pred, 
            target_names=class_labels,
            output_dict=True
        )
        
        return metrics
    
    def plot_confusion_matrix(self, X: pd.DataFrame, y: pd.Series, 
                            figsize: Tuple[int, int] = (10, 8)) -> None:
        """
        Plot confusion matrix using seaborn
        """
        metrics = self.evaluate(X, y)
        cm = metrics['confusion_matrix']
        class_labels = self.preprocessor.target_encoder.categories_[0]
        
        plt.figure(figsize=figsize)
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                   xticklabels=class_labels,
                   yticklabels=class_labels)
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.tight_layout()
        plt.show()
    
    def print_metrics(self, X: pd.DataFrame, y: pd.Series) -> None:
        """
        Print all metrics in a formatted way
        """
        metrics = self.evaluate(X, y)
        
        print("\n=== Model Evaluation Metrics ===\n")
        
        print("Overall Metrics:")
        print(f"Accuracy: {metrics['accuracy']:.4f}")
        print(f"F1 Score (Micro): {metrics['f1_micro']:.4f}")
        print(f"F1 Score (Macro): {metrics['f1_macro']:.4f}")
        print(f"F1 Score (Weighted): {metrics['f1_weighted']:.4f}")
        
        print("\nDetailed Classification Report:")
        report = metrics['classification_report']
        
        # Print metrics for each class
        for class_name in self.preprocessor.target_encoder.categories_[0]:
            if class_name in report:
                class_metrics = report[class_name]
                print(f"\nClass: {class_name}")
                print(f"Precision: {class_metrics['precision']:.4f}")
                print(f"Recall: {class_metrics['recall']:.4f}")
                print(f"F1-Score: {class_metrics['f1-score']:.4f}")
                print(f"Support: {class_metrics['support']}")


#### Model create

In [8]:
def create_and_train_model(
    X: pd.DataFrame,
    y: pd.Series,
    model_params: Dict[str, Any] = None,
    training_params: Dict[str, Any] = None
) -> Tuple[TransformerClassifier, DataPreprocessor, ModelEvaluator]:
    """
    Creates, trains and returns the model, preprocessor, and evaluator
    """
    # Default parameters
    default_model_params = {
        'd_model': 128,
        'num_layers': 4,
        'num_heads': 8,
        'dff': 512,
        'dropout_rate': 0.1
    }
    
    default_training_params = {
        'batch_size': 32,
        'epochs': 10,
        'validation_split': 0.2,
        'learning_rate': 0.001
    }
    
    # Update defaults with provided parameters
    model_params = {**default_model_params, **(model_params or {})}
    training_params = {**default_training_params, **(training_params or {})}
    
    # Preprocess data
    preprocessor = DataPreprocessor()
    X_encoded, y_encoded = preprocessor.fit_transform(X, y)
    
    # Create model
    model = TransformerClassifier(
        input_dim=X_encoded.shape[1],
        num_classes=y_encoded.shape[1],
        **model_params
    )
    
    # Compile model
    optimizer = tf.keras.optimizers.Adam(learning_rate=training_params['learning_rate'])
    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    # Train model
    model.fit(
        X_encoded,
        y_encoded,
        batch_size=training_params['batch_size'],
        epochs=training_params['epochs'],
        validation_split=training_params['validation_split']
    )
    
    # Create evaluator
    evaluator = ModelEvaluator(model, preprocessor)
    
    return model, preprocessor, evaluator

#### Main

In [None]:
# 1. Data Preparation
print("1. Preparing Data...")
df = pd.read_csv("/kaggle/input/very-downsampled-csecicids/downsampled_original_ratio.csv")
print(df.shape)
y = df['Attack']
X = df.drop(['Attack'], axis=1)

del df
gc.collect()

# 2. Train-Test Split
print("\n2. Splitting Data...")
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

# 3. Model Configuration
print("\n3. Configuring Model...")
model_params = {
    'd_model': 128,        # Smaller model for demo
    'num_layers': 2,      # Fewer layers for faster training
    'num_heads': 2,
    'dff': 128,
    'dropout_rate': 0.3
}

training_params = {
    'batch_size': 32,
    'epochs': 10,          # Fewer epochs for demo
    'validation_split': 0.2,
    'learning_rate': 0.001
}

print("Model parameters:", model_params)
print("Training parameters:", training_params)

# 4. Model Training
print("\n4. Training Model...")
model, preprocessor, evaluator = create_and_train_model(
    X_train,
    y_train,
    model_params=model_params,
    training_params=training_params
)

# 5. Model Evaluation
print("\n5. Evaluating Model...")

# Print detailed metrics
print("\nTraining Set Metrics:")
evaluator.print_metrics(X_train, y_train)

print("\nTest Set Metrics:")
evaluator.print_metrics(X_test, y_test)

# Plot confusion matrices
plt.figure(figsize=(15, 5))

plt.subplot(1, 2, 1)
evaluator.plot_confusion_matrix(X_train, y_train, figsize=(7, 5))
plt.title("Training Set Confusion Matrix")

plt.subplot(1, 2, 2)
evaluator.plot_confusion_matrix(X_test, y_test, figsize=(7, 5))
plt.title("Test Set Confusion Matrix")

plt.tight_layout()
plt.show()

1. Preparing Data...
(911627, 45)

2. Splitting Data...
Training set size: 729301
Test set size: 182326

3. Configuring Model...
Model parameters: {'d_model': 128, 'num_layers': 2, 'num_heads': 2, 'dff': 128, 'dropout_rate': 0.3}
Training parameters: {'batch_size': 32, 'epochs': 10, 'validation_split': 0.2, 'learning_rate': 0.001}

4. Training Model...


