In [1]:
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import (
    StratifiedKFold,
    cross_validate
)

warnings.filterwarnings('ignore')


#### 1.Load the dataset

In [5]:
X_train = np.load('artifacts/X_train.npz')['arr_0']
Y_train = np.load('artifacts/Y_train.npz')['arr_0']
X_test = np.load('artifacts/X_test.npz')['arr_0']
y_test = np.load('artifacts/y_test.npz')['arr_0']

#### 2. Configure K-fold CV

In [2]:
cv = StratifiedKFold(
    n_splits=6,
    random_state=42,
    shuffle=True
)

In [3]:
model_lr = LogisticRegression(
    random_state=42,   
    max_iter=1000     
)


In [6]:
cv_result = cross_validate(
    model_lr,
    X_train,
    Y_train,
    cv = cv,
    scoring='accuracy',
    return_train_score=True
)

In [7]:
cv_result

{'fit_time': array([0.01278734, 0.01193953, 0.01008987, 0.01029897, 0.009547  ,
        0.00997615]),
 'score_time': array([0.00151253, 0.00133419, 0.00071621, 0.00086451, 0.00073981,
        0.00061893]),
 'test_score': array([0.73863636, 0.71070076, 0.73188063, 0.7408811 , 0.72666982,
        0.73519659]),
 'train_score': array([0.7301061 , 0.73503221, 0.73003694, 0.72937388, 0.7338259 ,
        0.7294686 ])}

In [10]:
score_arr = ['accuracy', 'precision' , 'recall' , 'f1']
for score in score_arr:
    cv_result = cross_validate(
                        model_lr,
                        X_train,
                        Y_train,
                        cv = cv,
                        scoring='accuracy',
                        return_train_score=True
                        )
    test_score = cv_result['test_score']
    test_score_avg = np.mean(test_score)

    print(f"{score} : {test_score_avg}")

accuracy : 0.7306608765891984
precision : 0.7306608765891984
recall : 0.7306608765891984
f1 : 0.7306608765891984
