In [12]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, LeaveOneOut, RepeatedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [13]:
iris = load_iris()
X = iris.data
y = iris.target

pd.DataFrame(X, columns=iris.feature_names).head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


## 1. Hold-Out Validation

In [14]:
# Split data into training (80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Hold-Out Validation Accuracy: {accuracy:.3f}")

Hold-Out Validation Accuracy: 1.000


## 2. K-Fold Cross Validation

In [15]:
# Define KFold with 5 splits
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Evaluate model using cross_val_score
model = RandomForestClassifier(random_state=42)
scores = cross_val_score(model, X, y, cv=kf, scoring='accuracy')

print(f"K-Fold Cross Validation Scores: {scores}")
print(f"Average Accuracy: {np.mean(scores):.3f}")

K-Fold Cross Validation Scores: [1.         0.96666667 0.93333333 0.93333333 0.96666667]
Average Accuracy: 0.960


## 3. Stratified K-Fold Cross Validation

In [16]:
# Define StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

model = RandomForestClassifier(random_state=42)
scores = cross_val_score(model, X, y, cv=skf, scoring='accuracy')

print(f"Stratified K-Fold Scores: {scores}")
print(f"Average Accuracy: {np.mean(scores):.3f}")

Stratified K-Fold Scores: [0.96666667 0.96666667 0.93333333 0.96666667 0.9       ]
Average Accuracy: 0.947


## 4. Leave-One-Out Cross Validation (LOOCV)

In [17]:
%%time

loo = LeaveOneOut()
model = RandomForestClassifier(random_state=42)

# cross_val_score automatically handles the looping
scores = cross_val_score(model, X, y, cv=loo, scoring='accuracy')

print(f"Total iterations: {len(scores)}")
print(f"LOOCV Average Accuracy: {np.mean(scores):.3f}")

Total iterations: 150
LOOCV Average Accuracy: 0.953
CPU times: total: 28.6 s
Wall time: 29 s


## 5. Repeated K-Fold Cross Validation

In [18]:
%%time

rkf = RepeatedKFold(n_splits=5, n_repeats=3, random_state=42)

model = RandomForestClassifier(random_state=42)

scores = cross_val_score(model, X, y, cv=rkf, scoring='accuracy')

print(f"Repeated K-Fold Average Accuracy: {np.mean(scores):.3f}")

Repeated K-Fold Average Accuracy: 0.956
CPU times: total: 2.77 s
Wall time: 2.84 s
