# Gradiant Boosting Classifier

## 'Label' Feature

In [41]:
import pandas as pd
from sklearn.model_selection import train_test_split
from scripts import preprocess as ref
from sklearn import metrics # is used to create classification results
from sklearn.ensemble import GradientBoostingClassifier


ORIGINAL_CSV = '../data/UNSW-NB15-BALANCED-TRAIN.csv'

origin = pd.read_csv(ORIGINAL_CSV, encoding='ISO-8859-1', low_memory=False)
df = ref.preprocess_data(origin)

df['srcip'] = pd.factorize(df['srcip'])[0]
df['dstip'] = pd.factorize(df['dstip'])[0]

In [42]:
x = df.drop(['attack_cat', 'Label'], axis=1)
y_Label = df['Label']

# Train model with 30% of data will be used as a test model
x_Label_train, x_Label_test, y_Label_train, y_Label_test \
                                    = train_test_split(x,
                                                       y_Label,
                                                       test_size=0.3,
                                                       shuffle=True,
                                                       stratify=y_Label,
                                                       random_state=42)

In [43]:
# Default: max_depth=3, learning_rate=0.1
classifier_Label = GradientBoostingClassifier(n_estimators=20,
                                              learning_rate=0.5,
                                              max_depth=3)
classifier_Label.fit(x_Label_train, y_Label_train)

In [44]:
y_Label_pred = classifier_Label.predict(x_Label_test)

In [45]:
print(metrics.classification_report(y_Label_test, y_Label_pred))

              precision    recall  f1-score   support

           0       1.00      0.99      0.99     67470
           1       0.99      1.00      0.99     67469

    accuracy                           0.99    134939
   macro avg       0.99      0.99      0.99    134939
weighted avg       0.99      0.99      0.99    134939



## 'Attack_cat' Feature

In [46]:
y_attack_cat = df['attack_cat']

# Train model with 30% of data will be used as a test model
x_attack_cat_train, x_attack_cat_test, y_attack_cat_train, y_attack_cat_test \
                                    = train_test_split(x,
                                                       y_attack_cat,
                                                       test_size=0.3,
                                                       shuffle=True,
                                                       stratify=y_attack_cat,
                                                       random_state=32)


In [50]:
# Default: max_depth=3, learning_rate=0.1
classifier_attack_cat = GradientBoostingClassifier(learning_rate=0.1,
                                                   max_depth=3)

classifier_attack_cat.fit(x_attack_cat_train, y_attack_cat_train)

In [51]:
y_attack_cat_pred = classifier_attack_cat.predict(x_attack_cat_test)

In [52]:
print(metrics.classification_report(y_attack_cat_test, y_attack_cat_pred,
                                    zero_division=0))

              precision    recall  f1-score   support

           0       1.00      0.99      0.99     67470
           1       1.00      0.98      0.99     45251
           2       0.81      0.86      0.83      5090
           3       0.60      0.89      0.72      9348
           4       0.41      0.13      0.19      3425
           5       0.91      0.77      0.83      2953
           6       0.65      0.05      0.10       378
           7       0.66      0.08      0.14       558
           8       0.86      0.83      0.85       314
           9       0.15      0.90      0.25       117
          10       0.23      0.14      0.18        35

    accuracy                           0.94    134939
   macro avg       0.66      0.60      0.55    134939
weighted avg       0.94      0.94      0.93    134939

