In [3]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression

# Load the diabetes dataset
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

# Create a linear regression model
model = LinearRegression()

# Create a KFold object with 5 folds
# without shuffling by default
# Each fold is then used once as a validation while 
# The k - 1 remaining folds form the training set.
kfold = KFold(n_splits=5)

# Perform cross-validation
for train_index, test_index in kfold.split(X):
    # Split the data into training and testing sets
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Fit the model on the training data
    model.fit(X_train, y_train)
    
    # Evaluate the model on the testing data
    score = model.score(X_test, y_test)
    
    # Print the score for each fold
    print("Fold Score:", score)

Fold Score: 0.4295564286585779
Fold Score: 0.5225982811135659
Fold Score: 0.4826783998252704
Fold Score: 0.4265082749941945
Fold Score: 0.550249225965861


In [5]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression

# Load the diabetes dataset
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

# Create a logistic regression model
model = LogisticRegression()

# Create a StratifiedKFold object with 5 folds
# Stratified cross-validation is particularly useful when dealing with imbalanced datasets, 
# where the class distribution is uneven. 
# It ensures that each fold has a proportional representation of different classes, 
# helping to produce more reliable and representative evaluation results.

stratified_kfold = StratifiedKFold(n_splits=5,random_state=None, shuffle=False)

# Perform cross-validation
for train_index, test_index in stratified_kfold.split(X, y):
    # Split the data into training and testing sets
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Fit the model on the training data
    model.fit(X_train, y_train)
    
    # Evaluate the model on the testing data
    score = model.score(X_test, y_test)
    
    # Print the score for each fold
    print("StratifiedKFold Score:", score)
    




StratifiedKFold Score: 0.011235955056179775
StratifiedKFold Score: 0.011235955056179775
StratifiedKFold Score: 0.011363636363636364
StratifiedKFold Score: 0.011363636363636364
StratifiedKFold Score: 0.011363636363636364


In [7]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import GroupKFold
from sklearn.linear_model import LinearRegression

# Load the diabetes dataset
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target
groups = diabetes.data[:, 0]  # Use the first feature as groups

# Create a linear regression model
model = LinearRegression()

# Create a GroupKFold object with 5 groups
group_kfold = GroupKFold(n_splits=5)

# Perform cross-validation
for train_index, test_index in group_kfold.split(X, y, groups):
    # Split the data into training and testing sets
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the model on the training data
    model.fit(X_train, y_train)
    
    # Evaluate the model on the testing data
    score = model.score(X_test, y_test)
    
    # Print the score for each fold
    print("Fold Score:", score)


Fold Score: 0.42819426229187185
Fold Score: 0.4912257033901125
Fold Score: 0.46395603957976983
Fold Score: 0.5467154438789377
Fold Score: 0.5091372300916409
