# Logistic Regression Model

## Imports

In [1]:
# Custom
import sys
sys.path.append('../')
from utils.constant import FEATURES, LABELS, ALL_ATTACKS
from utils.dataset_manager import fit_dataset, get_classes_weights

# General
import warnings
from joblib import dump

# Model and Metrics
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression

# Warnings
warnings.filterwarnings('ignore')

## Dataset


In [2]:
n_files = 20

df_train, df_test = fit_dataset(n_files, ALL_ATTACKS)

X_train, y_train = df_train[FEATURES], df_train[LABELS]

# Prints
print('Training Population: {}'.format(len(df_train)))
print('Testing Population: {}'.format(len(df_test)))

100%|██████████| 20/20 [00:18<00:00,  1.07it/s]
100%|██████████| 6/6 [00:04<00:00,  1.32it/s]


Training Population: 4723822
Testing Population: 1648176


# Model

In [3]:
# Model
log_reg_model = LogisticRegression(class_weight=get_classes_weights(df_train))

# Train
log_reg_model.fit(X_train, y_train)

In [4]:
# Save Model
name = f"../outputs/log_regression_{n_files}.joblib"
dump(log_reg_model, name)

['../outputs/log_regression_20.joblib']

# Evaluation

In [5]:
X_test , y_test = df_test[FEATURES], df_test[LABELS]

# Predict
y_pred = log_reg_model.predict(X_test)

# Evaluate
print('Classification Report: \n{}'.format(classification_report(y_test, y_pred)))

Classification Report: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    142361
           1       1.00      0.98      0.99    144128
           2       0.65      0.93      0.77    143521
           3       0.72      0.91      0.81    191686
           4       0.65      0.79      0.71    159101
           5       1.00      1.00      1.00    254077
           6       0.77      0.63      0.69    126849
           7       0.87      0.98      0.92     10061
           8       0.93      0.98      0.96     10244
           9       0.98      0.98      0.98     16043
          10       0.14      0.47      0.22       844
          11       0.28      0.73      0.40      1050
          12       0.74      0.43      0.54    116827
          13       0.50      0.21      0.30     71688
          14       0.42      0.28      0.34     94194
          15       0.53      0.74      0.62      2474
          16       0.61      0.65      0.62     35144
   