# K-fold cross validation

Implement a random k-fold cross validation algorithm from scratch.

Your algorithm should:
- load the iris dataset and split its columns into features and target
- split the dataset into k-fold to perform cross validation

You can use the code bellow to implement your algorithm or implement yourself from scratch.



In [3]:
# we will implement a k-fold cross validation from scratch
# we will use the iris dataset

import numpy as np
import pandas as pd
from sklearn.datasets import load_iris

# load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

def k_fold_cross_validation(X, y, k, model):
    # Number of samples in the dataset
    n_samples = X.shape[0]

    # Shuffle indices and split them into k subsets
    indices = np.arange(n_samples)
    np.random.shuffle(indices)
    fold_sizes = np.full(k, n_samples // k, dtype=int)
    fold_sizes[:n_samples % k] += 1
    current = 0
    folds = []
    for fold_size in fold_sizes:
        start, stop = current, current + fold_size
        folds.append(indices[start:stop])
        current = stop

    accuracies = []
    for i in range(k):
        # Create training and test sets for the i-th fold
        test_indices = folds[i]
        train_indices = np.concatenate([folds[j] for j in range(k) if j != i])

        X_train, X_test = X[train_indices], X[test_indices]
        y_train, y_test = y[train_indices], y[test_indices]

        # Train the model on the training set
        model.fit(X_train, y_train)

        # Make predictions and evaluate performance on the test set
        y_pred = model.predict(X_test)
        accuracy = np.mean(y_pred == y_test)
        accuracies.append(accuracy)

    # Return the average accuracy
    return np.mean(accuracies)


In [4]:
#You can use the code below to test your function

#import the random forest model
from sklearn.ensemble import RandomForestClassifier

# we will use the random forest model
model = RandomForestClassifier()

# we will use the k_fold_cross_validation function
k_fold_cross_validation(X, y, 5, model)

0.96