# Logistic Regression Model

## Imports

In [42]:
import os
import warnings
import pandas as pd
from tqdm import tqdm
from utils.constant import *
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

warnings.filterwarnings('ignore')

## Dataset


In [37]:
# File Paths
df_sets = [k for k in os.listdir(DATASET_DIRECTORY) if k.endswith('.csv')]
df_sets.sort()

# Split
training_sets = df_sets[:15]
test_sets = df_sets[15:20]

# Preprocessing Functions
scaler = StandardScaler()

### Dataset Processing

In [38]:
df = pd.DataFrame()
for train_set in tqdm(training_sets):

    # Load data into a single dataframe
    df_set = pd.read_csv(DATASET_DIRECTORY + train_set)
    df = df._append(df_set, ignore_index=True)

    # Fit scaler
    scaler.fit(df_set[FEATURES])

# Scale
df[FEATURES] = scaler.transform(df[FEATURES])

# Encode labels
df[LABELS] = df[LABELS].apply(lambda x: ATTACKS[x])


100%|██████████| 15/15 [00:14<00:00,  1.05it/s]


# Model

In [39]:
# Model
model = LogisticRegression()

# Train
model.fit(df[FEATURES], df[LABELS])

# Evaluation

In [43]:
df_test = pd.DataFrame()
for test_set in test_sets:
    
    # Load data into a single dataframe
    df_set = pd.read_csv(DATASET_DIRECTORY + test_set)
    df_test = df_test._append(df_set, ignore_index=True)

# Scale
df_test[FEATURES] = scaler.transform(df_test[FEATURES])

# Encode labels
df_test[LABELS] = df_test[LABELS].apply(lambda x: ATTACKS[x])

# Predict
y_pred = list(model.predict(df_test[FEATURES]))

# Evaluate
y_test = list(df_test[LABELS])
print('accuracy_score = ', accuracy_score(y_pred, y_test))
print('recall_score = ', recall_score(y_pred, y_test, average='macro'))
print('precision_score = ', precision_score(y_pred, y_test, average='macro'))
print('f1_score = ', f1_score(y_pred, y_test, average='macro'))

accuracy_score =  0.8015961749593228
recall_score =  0.6163413634201793
precision_score =  0.4831665610271103
f1_score =  0.48998963481494057
