In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

X, y = make_classification(
    n_features=10,
    n_samples=1000,
    n_informative=8,
    n_redundant=2,
    n_repeated=0,
    n_classes=2,
    random_state=42,
)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

In [2]:
from sklearn.metrics import classification_report

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.73      0.65      0.69       130
           1       0.66      0.74      0.70       120

    accuracy                           0.70       250
   macro avg       0.70      0.70      0.70       250
weighted avg       0.70      0.70      0.70       250



In [3]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, random_state=42, shuffle=True)

for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # model = LogisticRegression()
    # model.fit(X_train, y_train)

    # y_pred = model.predict(X_test)
    # report = classification_report(y_test, y_pred)
    # print(report)

In [5]:
from sklearn.model_selection import cross_val_score

scores_logistic = cross_val_score(LogisticRegression(), X, y, cv=kf, scoring="accuracy")
np.mean(scores_logistic)

array([0.675, 0.715, 0.72 , 0.645, 0.72 ])

In [11]:
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier

scores_dt = cross_val_score(DecisionTreeClassifier(), X, y, cv=kf, scoring="accuracy")
np.average(scores_dt)

np.float64(0.796)

In [14]:
from sklearn.model_selection import cross_validate

scoring = ["accuracy", "precision", "recall", "roc_auc"]
scores = cross_validate(
    DecisionTreeClassifier(), X, y, cv=kf, scoring=scoring, return_train_score=True
)
scores

{'fit_time': array([0.00924516, 0.00800109, 0.00857234, 0.00737453, 0.00924778]),
 'score_time': array([0.00835609, 0.00921774, 0.00459409, 0.00838423, 0.00464344]),
 'test_accuracy': array([0.72 , 0.835, 0.77 , 0.825, 0.82 ]),
 'train_accuracy': array([1., 1., 1., 1., 1.]),
 'test_precision': array([0.72093023, 0.81651376, 0.84146341, 0.78504673, 0.86458333]),
 'train_precision': array([1., 1., 1., 1., 1.]),
 'test_recall': array([0.65957447, 0.87254902, 0.67647059, 0.875     , 0.78301887]),
 'train_recall': array([1., 1., 1., 1., 1.]),
 'test_roc_auc': array([0.71657969, 0.83423369, 0.77190876, 0.82692308, 0.8223605 ]),
 'train_roc_auc': array([1., 1., 1., 1., 1.])}