In [1]:
# Run this cell in Colab
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import (
    KFold, StratifiedKFold, LeaveOneOut, RepeatedKFold, cross_val_score
)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import make_scorer, f1_score

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Model
model = LogisticRegression(max_iter=200)


Project 1: Basic K-Fold Cross-Validation

In [2]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X, y, cv=kf, scoring='accuracy')

print("K-Fold Cross-Validation (5 splits)")
print("Scores:", scores)
print("Mean Accuracy:", scores.mean())


K-Fold Cross-Validation (5 splits)
Scores: [1.         1.         0.93333333 0.96666667 0.96666667]
Mean Accuracy: 0.9733333333333334


Project 2: Stratified K-Fold (Handles Imbalanced Classes)

In [3]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X, y, cv=skf, scoring='accuracy')

print("Stratified K-Fold (Preserves class distribution)")
print("Scores:", scores)
print("Mean Accuracy:", scores.mean())


Stratified K-Fold (Preserves class distribution)
Scores: [1.         0.96666667 0.93333333 1.         0.93333333]
Mean Accuracy: 0.9666666666666668


Project 3: Leave-One-Out (LOO) Cross-Validation

In [4]:
loo = LeaveOneOut()
scores = cross_val_score(model, X, y, cv=loo, scoring='accuracy')

print("Leave-One-Out Cross-Validation")
print("Number of folds:", len(scores))
print("Mean Accuracy:", scores.mean())


Leave-One-Out Cross-Validation
Number of folds: 150
Mean Accuracy: 0.9666666666666667


Project 4: Repeated K-Fold (For more stable estimates)

In [5]:
rkf = RepeatedKFold(n_splits=5, n_repeats=3, random_state=42)
scores = cross_val_score(model, X, y, cv=rkf, scoring='accuracy')

print("Repeated K-Fold (5 splits x 3 repeats)")
print("Number of runs:", len(scores))
print("Mean Accuracy:", scores.mean())


Repeated K-Fold (5 splits x 3 repeats)
Number of runs: 15
Mean Accuracy: 0.9688888888888889


Project 5: K-Fold with Custom Scoring (F1-score)

In [6]:
# Use macro F1 since Iris is multiclass
f1_macro = make_scorer(f1_score, average='macro')
scores = cross_val_score(model, X, y, cv=5, scoring=f1_macro)

print("K-Fold with F1 Macro Score")
print("F1 Scores:", scores)
print("Mean F1:", scores.mean())


K-Fold with F1 Macro Score
F1 Scores: [0.96658312 1.         0.93265993 0.96658312 1.        ]
Mean F1: 0.973165236323131
