<a href="https://colab.research.google.com/github/Nandu-130/major-p/blob/main/NIDS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Colab cell 1 - install / imports
!pip install --quiet torch torchvision torchaudio scikit-learn pandas numpy matplotlib

import os
import random
import math
from typing import Tuple, List, Dict

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset

from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)


Device: cuda


In [None]:
# Colab cell 2 - Utilities: MAD outlier removal, preprocessing functions
# ---------------------------------------------------------------------

from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, LabelEncoder

def mad_filter(df, numeric_cols, multiplier=10.0):
    """
    Apply Median Absolute Deviation (MAD) filtering to remove outliers.
    """
    df = df.copy()
    for col in numeric_cols:
        med = df[col].median()
        mad = (df[col] - med).abs().median()
        sigma_hat = 1.4826 * mad
        if sigma_hat == 0:
            continue
        threshold = multiplier * sigma_hat
        df = df[(df[col] - med).abs() <= threshold]
    return df

def preprocess_df(df, categorical_cols, numeric_cols, label_col):
    """
    Perform preprocessing steps:
      1. MAD outlier filtering
      2. One-hot encode categorical features
      3. Min–Max scale numeric features
      4. Label-encode target column
    """
    df = df.dropna().reset_index(drop=True)

    # --- Outlier removal ---
    df = mad_filter(df, numeric_cols, multiplier=10.0)

    # --- One-hot encode categorical features ---
    if categorical_cols:
        ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
        cat_arr = ohe.fit_transform(df[categorical_cols])
        cat_cols = ohe.get_feature_names_out(categorical_cols)
        df_cat = pd.DataFrame(cat_arr, columns=cat_cols, index=df.index)
        df = pd.concat([df.drop(columns=categorical_cols), df_cat], axis=1)

    # --- Min–Max scaling for numeric features ---
    scaler = MinMaxScaler()
    df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

    # --- Label encode target column ---
    le = LabelEncoder()
    df[label_col] = le.fit_transform(df[label_col])

    return df, scaler, le


In [None]:
# ✅ Colab Cell 3 - Load NSL-KDD dataset
import pandas as pd
import numpy as np
import os

# Download dataset if not present
if not os.path.exists("/content/KDDTrain+.txt"):
    !wget -q https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDTrain+.txt
    !wget -q https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDTest+.txt

# Define column names
cols = [
    'duration','protocol_type','service','flag','src_bytes','dst_bytes','land','wrong_fragment',
    'urgent','hot','num_failed_logins','logged_in','num_compromised','root_shell',
    'su_attempted','num_root','num_file_creations','num_shells','num_access_files',
    'num_outbound_cmds','is_host_login','is_guest_login','count','srv_count',
    'serror_rate','srv_serror_rate','rerror_rate','srv_rerror_rate','same_srv_rate',
    'diff_srv_rate','srv_diff_host_rate','dst_host_count','dst_host_srv_count',
    'dst_host_same_srv_rate','dst_host_diff_srv_rate','dst_host_same_src_port_rate',
    'dst_host_srv_diff_host_rate','dst_host_serror_rate','dst_host_srv_serror_rate',
    'dst_host_rerror_rate','dst_host_srv_rerror_rate','label','difficulty'
]

# Load train and test data
train_df = pd.read_csv("/content/KDDTrain+.txt", names=cols)
test_df  = pd.read_csv("/content/KDDTest+.txt", names=cols)

# Merge both
df = pd.concat([train_df, test_df], ignore_index=True)

# Drop unused column
df.drop(columns=['difficulty'], inplace=True, errors='ignore')

# ---- Label grouping (multiclass example) ----
dos = ['back','land','neptune','pod','smurf','teardrop','mailbomb','apache2','processtable','udpstorm']
probe = ['satan','ipsweep','nmap','portsweep','mscan','saint']
r2l = ['guess_passwd','ftp_write','imap','phf','multihop','warezmaster','warezclient','spy','xlock','xsnoop',
        'snmpguess','snmpgetattack','httptunnel','sendmail','named']
u2r = ['buffer_overflow','loadmodule','rootkit','perl','sqlattack','xterm','ps']

def map_label(x):
    if x in dos: return 'DoS'
    elif x in probe: return 'Probe'
    elif x in r2l: return 'R2L'
    elif x in u2r: return 'U2R'
    elif x == 'normal': return 'Normal'
    else: return 'Other'

df['label'] = df['label'].apply(map_label)

print("✅ NSL-KDD dataset loaded successfully!")
print("Shape:", df.shape)
print("Label counts:\n", df['label'].value_counts().to_string())


✅ NSL-KDD dataset loaded successfully!
Shape: (148517, 42)
Label counts:
 label
Normal    77054
DoS       53385
Probe     14077
R2L        3880
U2R         119
Other         2


In [None]:
# Colab cell 4 - Prepare train/test and split by class (we will train a GAN per class to generate minority samples)
LABEL_COL = 'label'
# Identify categorical/numeric columns based on original dataframe dtypes
# We need to make sure these are correctly identified before preprocessing
categorical_cols = [c for c in df.columns if df[c].dtype == object and c != LABEL_COL]
numeric_cols = [c for c in df.columns if c not in categorical_cols + [LABEL_COL]]

# Explicitly convert categorical columns to string type before preprocessing
for col in categorical_cols:
    df[col] = df[col].astype(str)

# Now call preprocess_df with the dataframe where categorical columns are strings
df_proc, scaler, labelenc = preprocess_df(df, categorical_cols, numeric_cols, LABEL_COL)
print("Processed shape:", df_proc.shape)
X = df_proc.drop(columns=[LABEL_COL]).values.astype(np.float32)
y = df_proc[LABEL_COL].values
classes, counts = np.unique(y, return_counts=True)
print("Classes (encoded):", dict(zip(classes, counts)))

# Split train/test (stratify to preserve class ratios)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
print("Train/test:", X_train.shape, X_test.shape)

Processed shape: (104428, 119)
Classes (encoded): {np.int64(0): np.int64(38047), np.int64(1): np.int64(56093), np.int64(2): np.int64(7174), np.int64(3): np.int64(3051), np.int64(4): np.int64(63)}
Train/test: (73099, 118) (31329, 118)


In [None]:
# Colab cell 5 - PyTorch Dataset wrapper
class NumpyDataset(Dataset):
    def __init__(self, X, y=None):
        self.X = torch.from_numpy(X).float()
        self.y = None if y is None else torch.from_numpy(y).long()
    def __len__(self): return len(self.X)
    def __getitem__(self, idx):
        if self.y is None: return self.X[idx]
        return self.X[idx], self.y[idx]

batch_size = 256
train_loader = DataLoader(NumpyDataset(X_train, y_train), batch_size=batch_size, shuffle=True, drop_last=True)


In [None]:
# Colab cell 6 - WGAN-GP model definitions (MLP generator/discriminator)
# We'll use an MLP generator and discriminator suitable for tabular flows.
# Latent dim and hidden sizes chosen following the paper (latent 50, hidden around 80). :contentReference[oaicite:3]{index=3}

latent_dim = 50
hidden_dim = 80
data_dim = X_train.shape[1]

class Generator(nn.Module):
    def __init__(self, z_dim=latent_dim, out_dim=data_dim, hidden=hidden_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(z_dim, hidden),
            nn.BatchNorm1d(hidden),
            nn.ReLU(),
            nn.Linear(hidden, hidden),
            nn.BatchNorm1d(hidden),
            nn.ReLU(),
            nn.Linear(hidden, out_dim),
            nn.Sigmoid()  # because features are min-max scaled [0,1]
        )
    def forward(self, z): return self.net(z)

class Discriminator(nn.Module):
    def __init__(self, in_dim=data_dim, hidden=hidden_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden),
            nn.ReLU(),
            nn.Linear(hidden, hidden),
            nn.ReLU(),
            nn.Linear(hidden, 1)
        )
    def forward(self, x): return self.net(x).view(-1)


In [None]:
# Colab cell 7 - WGAN-GP training loop (per-class)
# Implements gradient penalty term. We'll train a generator per class (as the paper did for BEGAN), but using WGAN-GP.
# The paper suggests generating separate generators per class and generating synthetic data per class. :contentReference[oaicite:4]{index=4}

def gradient_penalty(D, real, fake, device):
    alpha = torch.rand(real.size(0), 1, device=device)
    alpha = alpha.expand_as(real)
    interpolated = alpha * real + (1 - alpha) * fake
    interpolated.requires_grad_(True)
    d_interpolated = D(interpolated)
    grads = torch.autograd.grad(outputs=d_interpolated, inputs=interpolated,
                                grad_outputs=torch.ones_like(d_interpolated),
                                create_graph=True, retain_graph=True, only_inputs=True)[0]
    grads = grads.view(grads.size(0), -1)
    gp = ((grads.norm(2, dim=1) - 1) ** 2).mean()
    return gp

def train_wgangp_for_class(X_class, n_epochs=200, batch_size=256, lr=1e-4, n_critic=5, lambda_gp=10.0):
    G = Generator().to(device)
    D = Discriminator().to(device)
    opt_G = optim.Adam(G.parameters(), lr=lr, betas=(0.5, 0.9))
    opt_D = optim.Adam(D.parameters(), lr=lr, betas=(0.5, 0.9))

    dataset = DataLoader(NumpyDataset(X_class), batch_size=batch_size, shuffle=True, drop_last=True)

    for epoch in range(n_epochs):
        for i, real_batch in enumerate(dataset):
            real = real_batch.to(device)
            # train D more steps
            for _ in range(n_critic):
                z = torch.randn(batch_size, latent_dim, device=device)
                fake = G(z).detach()
                D_real = D(real).mean()
                D_fake = D(fake).mean()
                gp = gradient_penalty(D, real, fake, device)
                d_loss = D_fake - D_real + lambda_gp * gp
                opt_D.zero_grad(); d_loss.backward(); opt_D.step()

            # train G
            z = torch.randn(batch_size, latent_dim, device=device)
            fake = G(z)
            g_loss = -D(fake).mean()
            opt_G.zero_grad(); g_loss.backward(); opt_G.step()

        # optional: print progress every some epochs
        if (epoch+1) % 50 == 0 or epoch == 0:
            print(f"[Epoch {epoch+1}/{n_epochs}] D_loss {d_loss.item():.4f} G_loss {g_loss.item():.4f} gp {gp.item():.4f}")

    return G, D

# We'll build a generator per class and store them
generators = {}
unique_train_classes, counts = np.unique(y_train, return_counts=True)
print("Train class counts:", dict(zip(unique_train_classes, counts)))
for cls in unique_train_classes:
    # pick data rows belonging to this class
    Xc = X_train[y_train == cls]
    print(f"Training WGAN-GP for class {cls} (n={len(Xc)}) ...")
    # if class too small, lower batch or increase epochs. For demo keep epochs small.
    Gc, Dc = train_wgangp_for_class(Xc.astype(np.float32), n_epochs=150, batch_size=min(256, max(32, len(Xc)//4)))
    generators[cls] = Gc


Train class counts: {np.int64(0): np.int64(26632), np.int64(1): np.int64(39265), np.int64(2): np.int64(5022), np.int64(3): np.int64(2136), np.int64(4): np.int64(44)}
Training WGAN-GP for class 0 (n=26632) ...


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[Epoch 1/150] D_loss -4.9156 G_loss 5.1020 gp 0.0490
[Epoch 50/150] D_loss -0.3038 G_loss -0.0956 gp 0.0012
[Epoch 100/150] D_loss -0.2432 G_loss -0.0383 gp 0.0011
[Epoch 150/150] D_loss -0.2124 G_loss -0.0396 gp 0.0008
Training WGAN-GP for class 1 (n=39265) ...
[Epoch 1/150] D_loss -4.8116 G_loss 4.8528 gp 0.0469
[Epoch 50/150] D_loss -0.1165 G_loss -0.1455 gp 0.0009
[Epoch 100/150] D_loss -0.0808 G_loss -0.6735 gp 0.0004
[Epoch 150/150] D_loss -0.0740 G_loss -0.7483 gp 0.0006
Training WGAN-GP for class 2 (n=5022) ...
[Epoch 1/150] D_loss 4.4121 G_loss 0.1952 gp 0.4785
[Epoch 50/150] D_loss -0.8179 G_loss -0.0020 gp 0.0031
[Epoch 100/150] D_loss -0.4098 G_loss -0.3744 gp 0.0016
[Epoch 150/150] D_loss -0.3611 G_loss -0.2990 gp 0.0026
Training WGAN-GP for class 3 (n=2136) ...
[Epoch 1/150] D_loss 6.4271 G_loss 0.1525 gp 0.6562
[Epoch 50/150] D_loss -2.8346 G_loss 2.4067 gp 0.0192
[Epoch 100/150] D_loss -1.1187 G_loss 0.5414 gp 0.0041
[Epoch 150/150] D_loss -0.5957 G_loss 0.1203 gp 0.003

In [None]:
# Colab cell 8 - Generate synthetic samples per class and augment training set
# Decide how many synthetic samples per class to create. For minority classes, we generate more.
def generate_synthetic_for_class(G, n_samples):
    G.eval()
    with torch.no_grad():
        Z = torch.randn(n_samples, latent_dim, device=device)
        synth = G(Z).cpu().numpy()
    return synth

# Example policy: bring each class up to the size of the largest class (simple balance strategy)
train_counts = {c: int((y_train==c).sum()) for c in unique_train_classes}
max_count = max(train_counts.values())
aug_X_list = []
aug_y_list = []
for cls in unique_train_classes:
    need = max_count - train_counts[cls]
    if need <= 0:
        continue
    print(f"Generating {need} synthetic samples for class {cls}")
    Gc = generators[cls]
    synth = generate_synthetic_for_class(Gc, need)
    aug_X_list.append(synth)
    aug_y_list.append(np.full(len(synth), cls, dtype=int))

if aug_X_list:
    X_synth = np.vstack(aug_X_list)
    y_synth = np.concatenate(aug_y_list)
    X_train_aug = np.vstack([X_train, X_synth])
    y_train_aug = np.concatenate([y_train, y_synth])
else:
    X_train_aug = X_train.copy()
    y_train_aug = y_train.copy()

print("Augmented train shape:", X_train_aug.shape)


Generating 12633 synthetic samples for class 0
Generating 34243 synthetic samples for class 2
Generating 37129 synthetic samples for class 3
Generating 39221 synthetic samples for class 4
Augmented train shape: (196325, 118)


In [None]:
# Colab cell 9 - Autoencoder training (symmetric autoencoder similar to discriminator/BEGAN's AE)
# The paper used AE as feature extractor (same architecture as discriminator) with hidden 80 and latent 50. :contentReference[oaicite:5]{index=5}

class Autoencoder(nn.Module):
    def __init__(self, input_dim=data_dim, hidden=hidden_dim, latent=latent_dim):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden),
            nn.ReLU(),
            nn.Linear(hidden, latent),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent, hidden),
            nn.ReLU(),
            nn.Linear(hidden, input_dim),
            nn.Sigmoid()
        )
    def forward(self, x):
        z = self.encoder(x)
        xrec = self.decoder(z)
        return xrec

ae = Autoencoder().to(device)
ae_opt = optim.Adam(ae.parameters(), lr=1e-3)
ae_loss_fn = nn.MSELoss()

ae_loader = DataLoader(NumpyDataset(X_train_aug), batch_size=256, shuffle=True)
# train AE
for epoch in range(200):  # paper used up to 300 epochs; early stopping condition could be added
    epoch_loss = 0.0
    for xb in ae_loader:
        xb = xb.to(device)
        xr = ae(xb)
        loss = ae_loss_fn(xr, xb)
        ae_opt.zero_grad(); loss.backward(); ae_opt.step()
        epoch_loss += loss.item() * xb.size(0)
    epoch_loss /= len(ae_loader.dataset)
    if (epoch+1) % 50 == 0 or epoch == 0:
        print(f"AE Epoch {epoch+1} Loss {epoch_loss:.6f}")
# Extract encoder (to be frozen for classifier)
encoder = ae.encoder
for p in encoder.parameters():
    p.requires_grad = False
encoder.eval()


AE Epoch 1 Loss 0.014734
AE Epoch 50 Loss 0.000423
AE Epoch 100 Loss 0.000402
AE Epoch 150 Loss 0.000389
AE Epoch 200 Loss 0.000385


Sequential(
  (0): Linear(in_features=118, out_features=80, bias=True)
  (1): ReLU()
  (2): Linear(in_features=80, out_features=50, bias=True)
  (3): ReLU()
)

In [None]:
# Colab cell 10 - Build classifiers (DNN, 1D-CNN, LSTM) that use encoder as front-end (DNNAE / CNNAE)
# DNNAE: encoder output -> dense classifier
class DNNAE(nn.Module):
    def __init__(self, encoder, latent_dim=latent_dim, n_classes=len(unique_train_classes)):
        super().__init__()
        self.encoder = encoder
        self.classifier = nn.Sequential(
            nn.Linear(latent_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, n_classes)
        )
    def forward(self, x):
        with torch.no_grad():
            z = self.encoder(x)
        return self.classifier(z)

# CNNAE: since data is tabular, emulate 1D-CNN by reshaping to (batch, channels=1, seq_len=data_dim)
class CNNAE(nn.Module):
    def __init__(self, encoder, seq_len=data_dim, n_classes=len(unique_train_classes)):
        super().__init__()
        self.encoder = encoder
        self.conv = nn.Sequential(
            nn.Conv1d(1, 32, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=3, stride=1, padding=1),
            nn.BatchNorm1d(32),
            nn.Conv1d(32, 64, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)
        )
        self.fc = nn.Sequential(
            nn.Linear(64, 16),
            nn.ReLU(),
            nn.Linear(16, n_classes)
        )
    def forward(self, x):
        # pass through encoder first (frozen)
        with torch.no_grad():
            z = self.encoder(x)
        # reshape z to (batch, 1, latent_dim)
        z = z.unsqueeze(1)
        c = self.conv(z).view(z.size(0), -1)
        return self.fc(c)

# For LSTM, paper omitted encoder; we build an LSTM classifier on raw features shaped as sequences
class LSTMClassifier(nn.Module):
    def __init__(self, input_dim=data_dim, hidden_size=64, num_layers=2, n_classes=len(unique_train_classes)):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 32),
            nn.ReLU(),
            nn.Linear(32, n_classes)
        )
    def forward(self, x):
        # x shape: (batch, feature_dim) -> treat features as sequence length
        x_seq = x.unsqueeze(-1)  # (batch, seq_len, 1)
        out, _ = self.lstm(x_seq)
        last = out[:, -1, :]
        return self.fc(last)


In [None]:
# Colab cell 11 - Train classifier function + evaluate
def train_classifier(model, X_tr, y_tr, X_val, y_val, epochs=100, lr=1e-3):
    model = model.to(device)
    opt = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
    loss_fn = nn.CrossEntropyLoss()
    ds = DataLoader(NumpyDataset(X_tr, y_tr), batch_size=256, shuffle=True)
    for epoch in range(epochs):
        model.train()
        total_loss = 0.0
        for xb, yb in ds:
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            loss = loss_fn(logits, yb)
            opt.zero_grad(); loss.backward(); opt.step()
            total_loss += loss.item() * xb.size(0)
        total_loss /= len(ds.dataset)
        if (epoch+1) % 50 == 0 or epoch == 0:
            print(f"Epoch {epoch+1}/{epochs} loss {total_loss:.4f}")
    # eval
    model.eval()
    with torch.no_grad():
        pred_logits = model(torch.from_numpy(X_val).float().to(device))
        preds = pred_logits.argmax(dim=1).cpu().numpy()
    print(classification_report(y_val, preds, zero_division=0))
    return model

# Prepare validation/test sets (we'll use X_test / y_test processed earlier)


In [None]:
# Colab cell 12 - Train DNNAE, CNNAE, LSTM (epochs = 100)
import torch, gc
gc.collect()
torch.cuda.empty_cache()

# Optional: helps reduce CUDA fragmentation
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

print("Training DNNAE...")
model_dnnae = DNNAE(encoder)
model_dnnae = train_classifier(
    model_dnnae,
    X_train_aug.astype(np.float32), y_train_aug,
    X_test.astype(np.float32), y_test,
    epochs=100, lr=1e-3
)

print("\nTraining CNNAE...")
model_cnnae = CNNAE(encoder)
model_cnnae = train_classifier(
    model_cnnae,
    X_train_aug.astype(np.float32), y_train_aug,
    X_test.astype(np.float32), y_test,
    epochs=100, lr=1e-3
)

print("\nTraining LSTM (naive)...")
# If you still hit OOM, uncomment next line to move LSTM to CPU:
# device = torch.device("cpu")

model_lstm = LSTMClassifier()
model_lstm = train_classifier(
    model_lstm,
    X_train_aug.astype(np.float32), y_train_aug,
    X_test.astype(np.float32), y_test,
    epochs=100, lr=1e-3
)


Training DNNAE...
Epoch 1/100 loss 0.1924
Epoch 50/100 loss 0.0091
Epoch 100/100 loss 0.0065
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     11415
           1       0.99      1.00      0.99     16828
           2       0.99      0.98      0.98      2152
           3       0.95      0.90      0.92       915
           4       0.57      0.21      0.31        19

    accuracy                           0.99     31329
   macro avg       0.90      0.82      0.84     31329
weighted avg       0.99      0.99      0.99     31329


Training CNNAE...
Epoch 1/100 loss 0.2197
Epoch 50/100 loss 0.0071
Epoch 100/100 loss 0.0058
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     11415
           1       0.99      1.00      0.99     16828
           2       0.98      0.98      0.98      2152
           3       0.98      0.87      0.92       915
           4       0.60      0.16      0.25        19

OutOfMemoryError: CUDA out of memory. Tried to allocate 18.66 GiB. GPU 0 has a total capacity of 14.74 GiB of which 13.47 GiB is free. Process 5625 has 1.26 GiB memory in use. Of the allocated memory 1010.89 MiB is allocated by PyTorch, and 145.11 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# Colab cell 13 - Final evaluation summarised (accuracy)
def evaluate_model(model, X, y):
    model.eval()
    # Use a DataLoader for evaluation to avoid OOM errors on large datasets
    eval_dataset = NumpyDataset(X, y)
    eval_loader = DataLoader(eval_dataset, batch_size=256) # Use the same batch size as training
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for xb, yb in eval_loader:
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            preds = logits.argmax(dim=1).cpu().numpy()
            all_preds.append(preds)
            all_labels.append(yb.cpu().numpy())
    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    acc = accuracy_score(all_labels, all_preds)
    print("Accuracy:", acc)
    print(classification_report(all_labels, all_preds, zero_division=0))

print("DNNAE on test:")
evaluate_model(model_dnnae, X_test.astype(np.float32), y_test)
print("CNNAE on test:")
evaluate_model(model_cnnae, X_test.astype(np.float32), y_test)
print("LSTM on test:")
evaluate_model(model_lstm, X_test.astype(np.float32), y_test)

DNNAE on test:
Accuracy: 0.9918924957706917
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     11415
           1       0.99      0.99      0.99     16828
           2       0.97      0.99      0.98      2152
           3       0.91      0.94      0.93       915
           4       0.62      0.26      0.37        19

    accuracy                           0.99     31329
   macro avg       0.90      0.84      0.85     31329
weighted avg       0.99      0.99      0.99     31329

CNNAE on test:
Accuracy: 0.9921478502346069
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     11415
           1       0.99      1.00      0.99     16828
           2       0.98      0.98      0.98      2152
           3       0.97      0.89      0.93       915
           4       0.35      0.37      0.36        19

    accuracy                           0.99     31329
   macro avg       0.86      0.85      0.85