In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y= make_classification(
    n_features= 10,
    n_samples= 1000,
    n_informative= 8,
    n_redundant= 2,
    n_repeated=0,
    n_classes= 2,
    random_state= 42
)

X_train, X_test, y_train, y_test= train_test_split(X, y, test_size= 0.25, random_state= 42)

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

model= LogisticRegression()
model.fit(X_train, y_train)

y_pred= model.predict(X_test)
report= classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.73      0.65      0.69       130
           1       0.66      0.74      0.70       120

    accuracy                           0.70       250
   macro avg       0.70      0.70      0.70       250
weighted avg       0.70      0.70      0.70       250



In [3]:
from sklearn.model_selection import KFold

kf= KFold(n_splits= 5, shuffle= True, random_state= 42)

for train_idx, test_idx in kf.split([10, 20, 30, 40, 50]):
    print(train_idx, test_idx)


[0 2 3 4] [1]
[0 1 2 3] [4]
[0 1 3 4] [2]
[1 2 3 4] [0]
[0 1 2 4] [3]


In [6]:
for train_idx, test_idx in kf.split(X, y):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test= y[train_idx], y[test_idx]

model.fit(X_train, y_train)
print(model.score(X_test, y_test))

0.72


<h3>Logistic Regression</h3>

In [14]:
from sklearn.model_selection import cross_val_score
import numpy as np

score_lr= cross_val_score(LogisticRegression(), X, y, cv= kf)
np.average(score_lr)

np.float64(0.6950000000000001)

<h3>Decision Tree</h3>

In [15]:
from sklearn.tree import DecisionTreeClassifier

score_dt= cross_val_score(DecisionTreeClassifier(), X, y, cv= kf)
np.average(score_dt)

np.float64(0.776)

<h3>Random Forest Classifier</h3>

In [16]:
from sklearn.ensemble import RandomForestClassifier

score_rf= cross_val_score(RandomForestClassifier(), X, y, cv= kf)
np.average(score_rf)

np.float64(0.8880000000000001)

In [20]:
score_rf= cross_val_score(RandomForestClassifier(), X, y, cv= kf, scoring= "accuracy")
np.average(score_rf)

np.float64(0.893)

In [21]:
from sklearn.model_selection import cross_validate

cross_validate(DecisionTreeClassifier(), X, y, cv= kf, scoring= ["accuracy", "roc_auc"])

{'fit_time': array([0.01017237, 0.00974083, 0.01757836, 0.02070594, 0.02025938]),
 'score_time': array([0.00316191, 0.00253582, 0.00570035, 0.00581646, 0.00526404]),
 'test_accuracy': array([0.72 , 0.81 , 0.78 , 0.845, 0.81 ]),
 'test_roc_auc': array([0.71778402, 0.80872349, 0.78151261, 0.84655449, 0.81232437])}