### cross validation with all the variants

In [8]:
from sklearn.model_selection import KFold, StratifiedKFold, LeaveOneOut, RepeatedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
import numpy as np

# Sample data
X = np.random.rand(100, 10)  # 100 samples, 10 features
y = np.random.randint(2, size=100)  # Binary target variable

# Model
model = RandomForestClassifier()

# Standard K-Fold Cross Validation
kf = KFold(n_splits=6, shuffle=True, random_state=42)
scores = cross_val_score(model, X, y, cv=kf)
print("Standard K-Fold Average Accuracy:", np.mean(scores))

# Stratified K-Fold Cross Validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
stratified_scores = cross_val_score(model, X, y, cv=skf)
print("Stratified K-Fold Average Accuracy:", np.mean(stratified_scores))

# Leave-One-Out Cross Validation
loo = LeaveOneOut()
loo_scores = cross_val_score(model, X, y, cv=loo)
print("LOOCV Average Accuracy:", np.mean(loo_scores))

# Repeated K-Fold Cross Validation
rkf = RepeatedKFold(n_splits=5, n_repeats=10, random_state=42)
repeated_scores = cross_val_score(model, X, y, cv=rkf)
print("Repeated K-Fold Average Accuracy:", np.mean(repeated_scores))


Standard K-Fold Average Accuracy: 0.46936274509803927
Stratified K-Fold Average Accuracy: 0.53
LOOCV Average Accuracy: 0.48
Repeated K-Fold Average Accuracy: 0.49500000000000005


In [7]:
print("score for each and every fold : ",stratified_scores)
print("trained model with 5 fold thats why we got it 5 accuracy!")

score for each and every fold :  [0.4  0.55 0.45 0.35 0.5 ]
trained model with 5 fold thats why we got it 5 accuracy!


<h1>for the regression you will have to change the scoring parameter of cross_val_score</h1>

In [13]:
from sklearn.model_selection import KFold, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, make_scorer
import numpy as np

# Sample data
X = np.random.rand(100, 10)  # 100 samples, 10 features
y = np.random.rand(100)  # Continuous target variable

# Model
model = RandomForestRegressor()

# Scorer for MSE
mse_scorer = make_scorer(mean_squared_error)

# Standard K-Fold Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X, y, cv=kf, scoring=mse_scorer)

print("MSE Scores for each fold:", scores)
print("Average MSE:", np.mean(scores))


MSE Scores for each fold: [0.09515666 0.06409506 0.09118028 0.10002346 0.06524814]
Average MSE: 0.0831407205857377


## Thank you 😎