## Machine learning Model

* in this file we will be attempting to create a Machine Learning model that is able to identify and predict SYN flood from normal network communication.

In [16]:
#importing all the dependencies 
import pandas as pd
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

## loading our datasets 

* in this section, we will be loading our data, as well as encding out labels where Benign (0) and SYN (1)

In [17]:
# Load the datasets
train = pd.read_csv("D:/coding project/Detection-of-SYN-Flood-Attacks-Using-Machine-Learning-and-Deep-Learning-Techniques-with-Feature-Base/Data/Splits/train.csv")
test = pd.read_csv("D:/coding project/Detection-of-SYN-Flood-Attacks-Using-Machine-Learning-and-Deep-Learning-Techniques-with-Feature-Base/Data/Splits/test.csv")
eval_set = pd.read_csv("D:/coding project/Detection-of-SYN-Flood-Attacks-Using-Machine-Learning-and-Deep-Learning-Techniques-with-Feature-Base/Data/Splits/eval.csv")

# Encode labels: 'Benign' -> 0, 'SYN' -> 1
label_map = {'BENIGN': 0, 'Syn': 1}
for df in [train, test, eval_set]:
    df['Label'] = df['Label'].map(label_map)

# Separate features and labels
X_train, y_train = train.drop(columns=['Label']), train['Label']
X_test, y_test = test.drop(columns=['Label']), test['Label']
X_eval, y_eval = eval_set.drop(columns=['Label']), eval_set['Label']


## Model architecture

* in this section we will defining our model architecture and training our model  

In [18]:

# Initialize the XGBoost classifier
model = XGBClassifier(
    max_depth=6,
    learning_rate=0.1,
    n_estimators=100,
    scale_pos_weight=1,  # Can tune this if data is imbalanced
    use_label_encoder=False,
    eval_metric='logloss',
    verbosity=0
)

# Train the model
model.fit(X_train, y_train)

# Evaluate on test set
y_pred = model.predict(X_test)
print("Test Set Evaluation:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Evaluate on evaluation set
y_eval_pred = model.predict(X_eval)
print("\nEvaluation Set Results:")
print("Accuracy:", accuracy_score(y_eval, y_eval_pred))
print("Precision:", precision_score(y_eval, y_eval_pred))
print("Recall:", recall_score(y_eval, y_eval_pred))
print("F1 Score:", f1_score(y_eval, y_eval_pred))
print("ROC AUC:", roc_auc_score(y_eval, y_eval_pred))
print("Confusion Matrix:\n", confusion_matrix(y_eval, y_eval_pred))


ImportError: sklearn needs to be installed in order to use this module