In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("../Data/CICIDS2017_combined.csv", low_memory=False)

print("Original shape:", df.shape)

Original shape: (2830743, 79)


In [3]:
df.columns = df.columns.str.strip()
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)
print("After cleaning shape:", df.shape)


After cleaning shape: (2827876, 79)


In [4]:
X = df.drop("Label", axis=1)
y = df["Label"]

In [5]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

print("Number of classes:", len(label_encoder.classes_))


Number of classes: 15


In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded,
    test_size=0.2,
    random_state=42,
    stratify=y_encoded
)

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)

Train shape: (2262300, 78)
Test shape: (565576, 78)


In [7]:
from sklearn.preprocessing import Normalizer
norm = Normalizer()

X_train_norm = norm.fit_transform(X_train)
X_test_norm = norm.fit_transform(X_test)

In [8]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

In [9]:
X_train = X_train.astype("float32")
X_test = X_test.astype("float32")

X_train_scaled = X_train_scaled.astype("float32")
X_test_scaled = X_test_scaled.astype("float32")

X_train_norm = X_train_norm.astype("float32")
X_test_norm = X_test_norm.astype("float32")

y_test = y_test.astype("int32")
y_train = y_train.astype("int32")

In [10]:
np.save("../Data/X_train_scaled.npy" , X_train_scaled)
np.save("../Data/X_test_scaled.npy" , X_test_scaled)
np.save("../Data/X_train_norm.npy" , X_train_norm)
np.save("../Data/X_test_norm.npy" , X_test_norm)
np.save("../Data/X_train.npy" , X_train)
np.save("../Data/X_test.npy" , X_test)
np.save("../Data/y_train.npy" , y_train)
np.save("../Data/y_test.npy" , y_test)