# Logistic Regression Model

## Imports

In [1]:
# Custom
import sys
sys.path.append('../')
from utils.fit_dataset import fit_dataset
from utils.constant import FEATURES, LABELS, ATTACKS

# General
import warnings
from joblib import dump

# Model and Metrics
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report

# Warnings
warnings.filterwarnings('ignore')

## Dataset


In [2]:
n_files = 10

df_train, df_test = fit_dataset(n_files, ATTACKS)

X_train, y_train = df_train[FEATURES], df_train[LABELS]

# Prints
print('Training Population: {}'.format(len(df_train)))
print('Testing Population: {}'.format(len(df_test)))

100%|██████████| 10/10 [00:08<00:00,  1.17it/s]
100%|██████████| 3/3 [00:01<00:00,  1.58it/s]


Training Population: 2366956
Testing Population: 711796


# Model

In [3]:
# Model
log_reg_model = LogisticRegression()

# Train
log_reg_model.fit(X_train, y_train)

In [4]:
# Save Model
name = f"../outputs/log_reg_model_{n_files}.joblib"
dump(log_reg_model, name)

['../outputs/log_reg_model_10.joblib']

# Evaluation

In [5]:
X_test , y_test = df_test[FEATURES], df_test[LABELS]

# Predict
y_pred = log_reg_model.predict(X_test)

# Evaluate
print('Accuracy: {:.4f}'.format(accuracy_score(y_test, y_pred)))
print('Recall: {:4f}'.format(recall_score(y_test, y_pred)))
print('Precision: {:4f}'.format(precision_score(y_test, y_pred)))
print('F1: {:4f}'.format(f1_score(y_test, y_pred)))
print('Classification Report: \n{}'.format(classification_report(y_test, y_pred)))

Accuracy: 0.9889
Recall: 0.995303
Precision: 0.993362
F1: 0.994332
Classification Report: 
              precision    recall  f1-score   support

           0       0.79      0.72      0.75     16683
           1       0.99      1.00      0.99    695113

    accuracy                           0.99    711796
   macro avg       0.89      0.86      0.87    711796
weighted avg       0.99      0.99      0.99    711796

