<a href="https://colab.research.google.com/github/Hacxmr/log-analysis/blob/main/NSS_KDD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install imbalanced-learn tensorflow




In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping

# 1. Load dataset
df = pd.read_csv("/content/kdd_dataset.csv")
df.drop(columns=["Unnamed: 0", "difficulty"], inplace=True, errors='ignore')

# 2. Split features and labels
X = df.drop(columns=["class"])
y = df["class"]

# 3. One-hot encode categorical features
X_encoded = pd.get_dummies(X).astype('float32')

# 4. Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_encoded)

# 5. Convert labels to integer class indices
label_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
y_class_indices = pd.Series(y).astype('category').cat.codes
y_classes = pd.Series(y).astype('category').cat.categories
y_encoded = label_encoder.fit_transform(y_class_indices.values.reshape(-1, 1)).astype('float32')

# 6. Train-test split using class indices
X_train, X_test, y_train_indices, y_test_indices = train_test_split(
    X_scaled, y_class_indices, test_size=0.2, random_state=42
)

# 7. Filter rare classes in train set
(unique_classes, counts) = np.unique(y_train_indices, return_counts=True)
valid_classes = unique_classes[counts > 3]
train_mask = np.isin(y_train_indices, valid_classes)
X_train = X_train[train_mask]
y_train_indices = y_train_indices[train_mask]

# 8. Also filter test set to match training classes
test_mask = np.isin(y_test_indices, valid_classes)
X_test = X_test[test_mask]
y_test_indices = y_test_indices[test_mask]

# 9. Re-encode filtered labels
label_encoder = OneHotEncoder(sparse_output=False)
y_train = label_encoder.fit_transform(y_train_indices.to_numpy().reshape(-1, 1)).astype('float32')
y_test = label_encoder.transform(y_test_indices.to_numpy().reshape(-1, 1)).astype('float32')


# 10. Apply SMOTE
smote = SMOTE(k_neighbors=2, random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

# 11. Build model
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train_res.shape[1],)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.4),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(y_train_res.shape[1], activation='softmax')
])

# 12. Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 13. Early stopping
es = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)

# 14. Train model
history = model.fit(
    X_train_res, y_train_res,
    validation_split=0.2,
    epochs=50,
    batch_size=128,
    callbacks=[es],
    verbose=1
)

# 15. Evaluate on test set
loss, acc = model.evaluate(X_test, y_test, verbose=1)
print(f"\nTest Accuracy: {acc:.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m13476/13476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 5ms/step - accuracy: 0.8953 - loss: 0.3373 - val_accuracy: 0.4126 - val_loss: 6.0080
Epoch 2/50
[1m13476/13476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 5ms/step - accuracy: 0.9703 - loss: 0.0806 - val_accuracy: 0.4622 - val_loss: 5.1988
Epoch 3/50
[1m13476/13476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 5ms/step - accuracy: 0.9751 - loss: 0.0684 - val_accuracy: 0.4300 - val_loss: 4.9472
Epoch 4/50
[1m13476/13476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 5ms/step - accuracy: 0.9777 - loss: 0.0624 - val_accuracy: 0.4795 - val_loss: 4.4794
Epoch 5/50
[1m13476/13476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 5ms/step - accuracy: 0.9792 - loss: 0.0582 - val_accuracy: 0.4801 - val_loss: 4.4944
Epoch 6/50
[1m13476/13476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 5ms/step - accuracy: 0.9804 - loss: 0.0551 - val_accuracy: 0.4022 - val_loss: 5.349