# K-Fold Cross Validation Tutorial

Simple explanation with example using Scikit-learn.

In [None]:

# Import libraries
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier


## Load Dataset

In [None]:

digits = load_digits()
X = digits.data
y = digits.target

print("Data shape:", X.shape)
print("Target shape:", y.shape)


## Normal Train-Test Split

In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

model = LogisticRegression(solver='liblinear', multi_class='ovr')
model.fit(X_train, y_train)

print("Accuracy:", model.score(X_test, y_test))


## K-Fold Cross Validation

In [None]:

kf = KFold(n_splits=5, shuffle=True, random_state=42)

scores = cross_val_score(
    LogisticRegression(solver='liblinear', multi_class='ovr'),
    X, y, cv=kf
)

print("K-Fold Scores:", scores)
print("Average Accuracy:", scores.mean())


## Comparing Multiple Models with K-Fold

In [None]:

models = {
    "Logistic Regression": LogisticRegression(solver='liblinear', multi_class='ovr'),
    "SVM": SVC(gamma='auto'),
    "Random Forest": RandomForestClassifier(n_estimators=40)
}

for name, model in models.items():
    scores = cross_val_score(model, X, y, cv=5)
    print(name, "Average Accuracy:", scores.mean())


## Conclusion
K-Fold Cross Validation gives more reliable model evaluation than a single train-test split.