<a href="https://colab.research.google.com/github/SHAHMACP/Machine-Learning-Basics-and-Terminologies/blob/main/Cross_Validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, StratifiedKFold, LeaveOneOut, ShuffleSplit, LeavePOut, cross_val_score, train_test_split
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler

In [2]:
# Load dataset
data = load_iris()
X, y = data.data, data.target

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Initialize model
model = RandomForestClassifier(n_estimators=100, random_state=42)

In [3]:
# Train-test split (without cross-validation)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)
test_score = model.score(X_test, y_test)
print(f"Test Accuracy without Cross-Validation: {test_score:.4f}")

Test Accuracy without Cross-Validation: 1.0000


In [4]:
# Define different cross-validation techniques
cv_methods = {
    "K-Fold (5)": KFold(n_splits=5, shuffle=True, random_state=42),
    "Stratified K-Fold (5)": StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
    "Leave-One-Out": LeaveOneOut(),
    "Leave-P-Out (2)": LeavePOut(p=2),
    "Shuffle_split" : ShuffleSplit(test_size=0.3,train_size=0.5,n_splits=10)
}

In [5]:
# Perform cross-validation and print results
results = {}
for name, cv in cv_methods.items():
    scores = cross_val_score(model, X, y, cv=cv, scoring='accuracy')
    results[name] = {"Mean Accuracy": np.mean(scores)}

df_results = pd.DataFrame(results).T
print(df_results)


                       Mean Accuracy
K-Fold (5)                  0.960000
Stratified K-Fold (5)       0.946667
Leave-One-Out               0.953333
Leave-P-Out (2)             0.958166
Shuffle_split               0.942222
