In [1]:
%run Pre-Processing.ipynb

In [2]:
%run Models.ipynb

In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder, PowerTransformer
from imblearn.over_sampling import ADASYN
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from sklearn.feature_selection import SelectKBest, f_classif
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.ensemble import RandomForestClassifier

In [11]:
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim

class FactoredRBM(nn.Module):
    def __init__(self, n_visible, n_hidden, n_factors, learning_rate=0.01, batch_size=100, 
                 n_epochs=100, k=1, sigma=1.0):
        """
        Initialize the Factored RBM
        
        Args:
            n_visible (int): Number of visible units (features)
            n_hidden (int): Number of hidden units
            n_factors (int): Number of factors for the factored weights
            learning_rate (float): Learning rate for optimization
            batch_size (int): Size of mini-batches
            n_epochs (int): Number of training epochs
            k (int): Number of Gibbs sampling steps
            sigma (float): Standard deviation for Gaussian visible units
        """
        super(FactoredRBM, self).__init__()
        
        self.n_visible = n_visible
        self.n_hidden = n_hidden
        self.n_factors = n_factors
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.k = k
        self.sigma = sigma
        
        # Initialize factored weights with smaller values
        self.W_v = nn.Parameter(torch.randn(n_visible, n_factors) * 0.001)
        self.W_h = nn.Parameter(torch.randn(n_factors, n_hidden) * 0.001)
        
        # Initialize biases
        self.visible_bias = nn.Parameter(torch.zeros(n_visible))
        self.hidden_bias = nn.Parameter(torch.zeros(n_hidden))
        
        # For tracking feature importance
        self.feature_importance = None
        
        # Initialize scaler
        self.scaler = StandardScaler()
    
    def free_energy(self, v):
        """Calculate the free energy of the visible units"""
        v_term = -torch.sum((v - self.visible_bias).pow(2) / (2 * self.sigma**2), dim=1)
        wx = torch.matmul(torch.matmul(v, self.W_v), self.W_h)
        # Add numerical stability
        hidden_term = torch.sum(torch.log1p(torch.exp(torch.clamp(wx + self.hidden_bias, -20, 20))), dim=1)
        return -hidden_term - v_term
    
    def sample_hidden(self, v):
        """Sample hidden units given visible units"""
        wx = torch.matmul(torch.matmul(v, self.W_v), self.W_h)
        activation = wx + self.hidden_bias
        # Clamp values for numerical stability
        activation = torch.clamp(activation, -20, 20)
        p_h = torch.sigmoid(activation)
        # Ensure probabilities are valid
        p_h = torch.clamp(p_h, 0, 1)
        return p_h, torch.bernoulli(p_h)
    
    def sample_visible(self, h):
        """Sample visible units given hidden units"""
        wx = torch.matmul(torch.matmul(h, self.W_h.t()), self.W_v.t())
        mean_v = wx + self.visible_bias
        # For continuous data, sample from Gaussian with learned mean
        sample_v = mean_v + torch.randn_like(mean_v) * self.sigma
        return mean_v, sample_v
    
    def gibbs_step(self, v):
        """Perform one step of Gibbs sampling"""
        p_h, h = self.sample_hidden(v)
        mean_v, v = self.sample_visible(h)
        return mean_v, v, p_h, h
    
    def contrastive_divergence(self, v_pos):
        """Perform k steps of contrastive divergence"""
        # Positive phase
        p_h_pos, h_pos = self.sample_hidden(v_pos)
        
        # Negative phase
        v_neg = v_pos.clone()
        for _ in range(self.k):
            _, v_neg, _, _ = self.gibbs_step(v_neg)
        
        p_h_neg, _ = self.sample_hidden(v_neg)
        
        return v_pos, v_neg, p_h_pos, p_h_neg
    
    def calculate_feature_importance(self):
        """Calculate feature importance scores"""
        W = torch.matmul(self.W_v, self.W_h)
        importance = torch.sum(W.pow(2), dim=1)
        self.feature_importance = importance.detach().numpy()
        return self.feature_importance
    
    def fit(self, data, y=None, verbose=True):
        """
        Train the Factored RBM
        
        Args:
            data: numpy array or torch tensor of training data
            y: labels (not used, included for sklearn compatibility)
            verbose: whether to print progress
        """
        # Scale the data
        if isinstance(data, np.ndarray):
            data = self.scaler.fit_transform(data)
            data = torch.FloatTensor(data)
        
        optimizer = optim.Adam(self.parameters(), lr=self.learning_rate)
        
        n_batches = len(data) // self.batch_size
        
        for epoch in range(self.n_epochs):
            epoch_error = 0
            
            for i in range(n_batches):
                batch_start = i * self.batch_size
                batch_end = (i + 1) * self.batch_size
                v_pos = data[batch_start:batch_end]
                
                # Contrastive Divergence
                v_pos, v_neg, p_h_pos, p_h_neg = self.contrastive_divergence(v_pos)
                
                # Calculate gradients using free energy
                cost = torch.mean(self.free_energy(v_pos) - self.free_energy(v_neg))
                
                # Backward pass and optimization
                optimizer.zero_grad()
                cost.backward()
                
                # Clip gradients for stability
                torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=1.0)
                
                optimizer.step()
                
                epoch_error += cost.item()
            
            if verbose and (epoch + 1) % 10 == 0:
                print(f'Epoch {epoch + 1}/{self.n_epochs}, Error: {epoch_error / n_batches:.4f}')
        
        # Calculate final feature importance
        self.calculate_feature_importance()
        return self
    
    def transform(self, X, y=None):
        """
        Transform data using the hidden layer representation
        
        Args:
            X: input data
            y: labels (not used, included for sklearn compatibility)
        """
        if isinstance(X, np.ndarray):
            X = self.scaler.transform(X)  # Use transform, not fit_transform
            X = torch.FloatTensor(X)
        
        with torch.no_grad():
            hidden_probs, _ = self.sample_hidden(X)
        return hidden_probs.numpy()
    
    def get_feature_ranking(self):
        """Get features ranked by importance"""
        if self.feature_importance is None:
            self.calculate_feature_importance()
        return np.argsort(-self.feature_importance)

In [12]:
SVM_f1 = dict()
KNN_f1 = dict()
RFC_f1 = dict()
LGBM_f1 = dict()
XGB_f1 = dict()
CB_f1 = dict()

In [13]:
for x in range(71, 10, -10):
    num_feature = x
    print("No of feature", num_feature)
    selector = FactoredRBM(
        n_visible=X_train_resampled.shape[1],
        n_hidden=num_feature,
        n_factors=9,
        learning_rate=0.1,
        batch_size=4096,
        n_epochs=100,
        k=3
    )
    selector.fit(X_train_resampled, y_train_resampled)
    X_train_selected = selector.transform(X_train_resampled, y_train_resampled)
    X_test_selected = selector.transform(X_test_transformed)
    SVM_f1[x] = SVM_selector(X_train_selected, y_train_resampled, X_test_selected, y_test)
    KNN_f1[x] = KNN_Classifier(X_train_selected, y_train_resampled, X_test_selected, y_test)
    RFC_f1[x] = RandomForest_Classifier(X_train_selected, y_train_resampled, X_test_selected, y_test)
    LGBM_f1[x] = LGBM_Classifier(X_train_selected, y_train_resampled, X_test_selected, y_test)
    XGB_f1[x] = XGB_Classifier(X_train_selected, y_train_resampled, X_test_selected, y_test)
    CB_f1[x] = CatBoost_Classifier(X_train_selected, y_train_resampled, X_test_selected, y_test)

No of feature 71
Epoch 10/100, Error: -2895165666606156.0000
Epoch 20/100, Error: -51378029036553408.0000
Epoch 30/100, Error: -268944151074643904.0000
Epoch 40/100, Error: -864054554953229056.0000
Epoch 50/100, Error: -2130279326040873216.0000
Epoch 60/100, Error: -4446230531840812544.0000
Epoch 70/100, Error: -8275814465349258240.0000
Epoch 80/100, Error: -14167887403639595008.0000
Epoch 90/100, Error: -22755235998812303360.0000
Epoch 100/100, Error: -34754990589867966464.0000
SVM Model:-
[LibSVM]



Making predictions...

Classification Report:
              precision    recall  f1-score   support

      Normal     0.0000    0.0000    0.0000    103560
      Attack     0.0675    1.0000    0.1265      7496

    accuracy                         0.0675    111056
   macro avg     0.0337    0.5000    0.0632    111056
weighted avg     0.0046    0.0675    0.0085    111056


Confusion Matrix:
[[     0 103560]
 [     0   7496]]
KNN model


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report:
              precision    recall  f1-score   support

      Normal     0.9325    1.0000    0.9651    103560
      Attack     0.0000    0.0000    0.0000      7496

    accuracy                         0.9325    111056
   macro avg     0.4663    0.5000    0.4825    111056
weighted avg     0.8696    0.9325    0.8999    111056

Confusion Matrix:
[[103560      0]
 [  7496      0]]
Random Forest:-


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   11.7s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  1.1min finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.5s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:    0.6s finished


Classification Report:
              precision    recall  f1-score   support

      Normal     0.9908    0.6796    0.8063    103560
      Attack     0.1710    0.9132    0.2881      7496

    accuracy                         0.6954    111056
   macro avg     0.5809    0.7964    0.5472    111056
weighted avg     0.9355    0.6954    0.7713    111056

Confusion Matrix:
[[70384 33176]
 [  651  6845]]
LGBM Model:-
[LightGBM] [Info] Number of positive: 83069, number of negative: 414022
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.032172 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 213
[LightGBM] [Info] Number of data points in the train set: 497091, number of used features: 71
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.167110 -> initscore=-1.606248
[LightGBM] [Info] Start training from score -1.606248
Classification Report:


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

      Normal     0.9325    1.0000    0.9651    103560
      Attack     0.0000    0.0000    0.0000      7496

    accuracy                         0.9325    111056
   macro avg     0.4663    0.5000    0.4825    111056
weighted avg     0.8696    0.9325    0.8999    111056

Confusion Matrix:
[[103560      0]
 [  7496      0]]
XGB Model:-
Classification Report:
              precision    recall  f1-score   support

      Normal     0.9325    1.0000    0.9651    103560
      Attack     0.0000    0.0000    0.0000      7496

    accuracy                         0.9325    111056
   macro avg     0.4663    0.5000    0.4825    111056
weighted avg     0.8696    0.9325    0.8999    111056

Confusion Matrix:
[[103560      0]
 [  7496      0]]
CatBoost Model


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0:	learn: 0.5952799	total: 210ms	remaining: 3m 29s
100:	learn: 0.3377252	total: 7.29s	remaining: 1m 4s
200:	learn: 0.3377252	total: 13.9s	remaining: 55.4s
300:	learn: 0.3377252	total: 18s	remaining: 41.8s
400:	learn: 0.3377252	total: 23.1s	remaining: 34.5s
500:	learn: 0.3377252	total: 26.9s	remaining: 26.8s
600:	learn: 0.3377252	total: 32.6s	remaining: 21.7s
700:	learn: 0.3377252	total: 37s	remaining: 15.8s
800:	learn: 0.3377252	total: 42.6s	remaining: 10.6s
900:	learn: 0.3377252	total: 47.3s	remaining: 5.2s
999:	learn: 0.3377252	total: 51.5s	remaining: 0us
Classification Report:


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

      Normal     0.9325    1.0000    0.9651    103560
      Attack     0.0000    0.0000    0.0000      7496

    accuracy                         0.9325    111056
   macro avg     0.4663    0.5000    0.4825    111056
weighted avg     0.8696    0.9325    0.8999    111056

Confusion Matrix:
[[103560      0]
 [  7496      0]]
No of feature 61
Epoch 10/100, Error: -919940623020472.1250
Epoch 20/100, Error: -16101442659917486.0000
Epoch 30/100, Error: -83975499018375680.0000
Epoch 40/100, Error: -269381771752632096.0000
Epoch 50/100, Error: -663586898483613952.0000
Epoch 60/100, Error: -1384260944434877440.0000
Epoch 70/100, Error: -2575415889547164160.0000
Epoch 80/100, Error: -4407654697962359808.0000


KeyboardInterrupt: 

In [None]:
def print_plot(dictionary):
    keys = list(dictionary.keys())
    values = list(dictionary.values())
    plt.figure(figsize=(8, 6))
    plt.plot(keys, values, marker='o', linestyle='-', color='b')
    plt.xlabel('No of Features')
    plt.ylabel('F1-Score')
    plt.title('No of Features vs F1-Score')
    plt.grid(True)
    plt.show()

In [None]:
print_plot(SVM_f1)

In [None]:
print_plot(KNN_f1)

In [None]:
print_plot(RFC_f1)

In [None]:
print_plot(LGBM_f1)

In [None]:
print_plot(XGB_f1)

In [None]:
print_plot(CB_f1)