In [215]:
import sklearn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold

In [206]:
data = sklearn.datasets.load_iris()
df = pd.DataFrame(data.data, columns = data.feature_names)
df['target'] = data.target
y = df['target']
df = df.drop(['target'], axis=1)

### Hold-Out

In [207]:
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size = 0.3, random_state = 42)
logistic_regression = LogisticRegression(max_iter = 1000)
logistic_regression.fit(X_train, y_train)

test_prediction = logistic_regression.predict(X_test)

print("Precision, Recall\n")
print(sklearn.metrics.classification_report(y_test, test_prediction, digits=3))

Precision, Recall

              precision    recall  f1-score   support

           0      1.000     1.000     1.000        19
           1      1.000     1.000     1.000        13
           2      1.000     1.000     1.000        13

    accuracy                          1.000        45
   macro avg      1.000     1.000     1.000        45
weighted avg      1.000     1.000     1.000        45



### k-Fold

In [209]:
kf = KFold(n_splits=5)
for train_index, test_index in kf.split(df):
    X_train_fold, X_test_fold = df.iloc[train_index], df.iloc[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]

### stratificated Hold-Out

In [214]:
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size = 0.3, random_state = 42, stratify=y)
logistic_regression = LogisticRegression(max_iter = 1000)
logistic_regression.fit(X_train, y_train)

test_prediction = logistic_regression.predict(X_test)

print("Precision, Recall\n")
print(sklearn.metrics.classification_report(y_test, test_prediction, digits=3))

Precision, Recall

              precision    recall  f1-score   support

           0      1.000     1.000     1.000        15
           1      0.875     0.933     0.903        15
           2      0.929     0.867     0.897        15

    accuracy                          0.933        45
   macro avg      0.935     0.933     0.933        45
weighted avg      0.935     0.933     0.933        45



### stratificated K-fold

In [216]:
skf = StratifiedKFold(n_splits=5)

for train_index, test_index in skf.split(df, y):
    X_train_fold, X_test_fold = df.iloc[train_index], df.iloc[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]