# Imports

In [1]:
import pandas as pd
from sklearn import set_config
from joblib import Memory
from sklearn.preprocessing import StandardScaler

import sys, os
sys.path.insert(0, os.path.abspath(".."))

from src. model_evaluation_with_pca_and_balancing import evaluate_model, get_model_constructors, get_balancing_methods, train_and_evaluate_models, pca_train_and_evaluate_models

In [2]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

# No warnings

In [3]:
import warnings
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
sys.stderr = open(os.devnull, 'w')

# Memory Configuration

In [4]:
print('Configuring memory...')
set_config(working_memory=12000)
memory = Memory(location='cachedir', verbose=0)
memory.clear(warn=False)

Configuring memory...


# Load Data

In [5]:
print('Loading data...')
x_train = pd.read_csv("../datasets/x_train_unb.csv")
x_test = pd.read_csv("../datasets/x_test_unb.csv")
y_train = pd.read_csv("../datasets/y_train_unb.csv").squeeze()
y_test = pd.read_csv("../datasets/y_test_unb.csv").squeeze()

Loading data...


# Scaling 

In [8]:
scaler = StandardScaler()
X_train_scaled = x_train
X_test_scaled = x_test

# Model Setup

In [9]:
model_constructors = get_model_constructors(y_train)
balancing_methods = get_balancing_methods()

# Evaluation Without PCA

In [10]:
train_and_evaluate_models(X_train_scaled, y_train, X_test_scaled, y_test, model_constructors, balancing_methods)


=== Model: LogisticRegression, No balancing ===
Configuration: LogisticRegression, No balancing
[[424   0]
 [ 26   0]]
              precision    recall  f1-score   support

           0       0.94      1.00      0.97       424
           1       0.00      0.00      0.00        26

    accuracy                           0.94       450
   macro avg       0.47      0.50      0.49       450
weighted avg       0.89      0.94      0.91       450

------------------------------------------------------------

=== Model: SVM, No balancing ===
Configuration: SVM, No balancing
[[424   0]
 [ 26   0]]
              precision    recall  f1-score   support

           0       0.94      1.00      0.97       424
           1       0.00      0.00      0.00        26

    accuracy                           0.94       450
   macro avg       0.47      0.50      0.49       450
weighted avg       0.89      0.94      0.91       450

------------------------------------------------------------

=== Model: Ga

# Evaluation With PCA - Principal Component Analysis

In [11]:
pca_train_and_evaluate_models(X_train_scaled, y_train, X_test_scaled, y_test, model_constructors, balancing_methods, range(5, 17))


### PCA: 5 components ###

=== PCA=5, Model: LogisticRegression, No balancing ===
Configuration: PCA=5, LogisticRegression, No balancing
[[424   0]
 [ 26   0]]
              precision    recall  f1-score   support

           0       0.94      1.00      0.97       424
           1       0.00      0.00      0.00        26

    accuracy                           0.94       450
   macro avg       0.47      0.50      0.49       450
weighted avg       0.89      0.94      0.91       450

------------------------------------------------------------

=== PCA=5, Model: SVM, No balancing ===
Configuration: PCA=5, SVM, No balancing
[[424   0]
 [ 26   0]]
              precision    recall  f1-score   support

           0       0.94      1.00      0.97       424
           1       0.00      0.00      0.00        26

    accuracy                           0.94       450
   macro avg       0.47      0.50      0.49       450
weighted avg       0.89      0.94      0.91       450

--------------------