In [2]:
import numpy as np
from sklearn.datasets import load_iris # Import the Iris dataset loader
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold # KFold cross-validation split generator
from sklearn.metrics import accuracy_score # Metric for evaluation

In [3]:
# --- Configuration ---
K_FOLDS = 5 # Number of folds for K-Fold Cross-Validation

In [4]:
# --- Load Dataset ---
print("Loading Iris dataset...")
iris = load_iris()
X, y = iris.data, iris.target # X are features, y are labels
N_TOTAL_SAMPLES = len(X)
N_FEATURES = X.shape[1]
N_CLASSES = len(np.unique(y))

print(f"Total samples in Iris dataset: {N_TOTAL_SAMPLES}")
print(f"Number of features: {N_FEATURES}")
print(f"Number of classes: {N_CLASSES}")

Loading Iris dataset...
Total samples in Iris dataset: 150
Number of features: 4
Number of classes: 3


In [5]:
# --- Initialize ML Model ---
# We use Logistic Regression, a simple model suitable for this classification task.
# solver='liblinear' is good for small datasets, max_iter increases robustness.
model = LogisticRegression(solver='liblinear', max_iter=1000)

In [6]:
# --- Perform K-Fold Cross-Validation ---
print(f"\nPerforming {K_FOLDS}-Fold Cross-Validation on the entire dataset...")


Performing 5-Fold Cross-Validation on the entire dataset...


In [7]:
# Initialize KFold cross-validator
# n_splits: how many folds to create (K)
# shuffle=True: randomly shuffles the data before splitting (recommended)
# random_state: seed for shuffling for reproducibility
kf = KFold(n_splits=K_FOLDS, shuffle=True, random_state=42)

In [8]:
fold_accuracies = [] # List to store the accuracy score from each fold

In [10]:
# The kf.split(X, y) method generates the indices for the train and validation sets for each fold
for fold, (train_index, val_index) in enumerate(kf.split(X, y)):
    print(f"\n--- Processing Fold {fold + 1}/{K_FOLDS} ---")

    # Split the data into training and validation sets for this specific fold using the generated indices
    print(train_index, val_index)
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    # Train the model using the training data for this fold
    model.fit(X_train, y_train)

    # Evaluate the trained model using the validation data for this fold
    y_pred = model.predict(X_val)

    # Calculate the accuracy for this fold
    accuracy = accuracy_score(y_val, y_pred)

    print(f"  Fold {fold + 1} Training samples: {len(X_train)}")
    print(f"  Fold {fold + 1} Validation samples: {len(X_val)}")
    print(f"  Fold {fold + 1} Accuracy: {accuracy:.4f}")

    # Store the accuracy
    fold_accuracies.append(accuracy)


--- Processing Fold 1/5 ---
[  0   1   2   3   4   5   6   7   8  10  11  13  14  15  16  17  20  21
  22  23  24  25  27  28  32  33  34  35  37  38  39  40  41  42  43  44
  46  47  48  49  50  51  52  53  54  57  58  59  60  61  62  63  65  66
  67  70  71  72  74  75  77  79  80  81  83  84  85  86  87  88  89  90
  91  92  93  94  95  96  97  98  99 100 101 102 103 105 106 107 109 111
 112 113 114 115 116 117 119 120 121 122 123 124 125 126 129 130 133 134
 135 136 137 138 139 140 142 144 146 147 148 149] [  9  12  18  19  26  29  30  31  36  45  55  56  64  68  69  73  76  78
  82 104 108 110 118 127 128 131 132 141 143 145]
  Fold 1 Training samples: 120
  Fold 1 Validation samples: 30
  Fold 1 Accuracy: 1.0000

--- Processing Fold 2/5 ---
[  1   2   3   5   6   7   8   9  12  13  14  17  18  19  20  21  23  24
  25  26  29  30  31  33  34  35  36  37  38  39  41  43  45  46  47  48
  49  50  52  53  54  55  56  57  58  59  61  62  63  64  68  69  70  71
  72  73  74  76  77  7

In [1]:

# --- Report Results ---
# Calculate the average accuracy across all folds
avg_accuracy = np.mean(fold_accuracies)
print(f"\n--- Average {K_FOLDS}-Fold Cross-Validation Accuracy: {avg_accuracy:.4f} ---")
print("\nCross-validation finished.")

Loading Iris dataset...
Total samples in Iris dataset: 150
Number of features: 4
Number of classes: 3

Performing 5-Fold Cross-Validation on the entire dataset...

--- Processing Fold 1/5 ---
  Fold 1 Training samples: 120
  Fold 1 Validation samples: 30
  Fold 1 Accuracy: 1.0000

--- Processing Fold 2/5 ---
  Fold 2 Training samples: 120
  Fold 2 Validation samples: 30
  Fold 2 Accuracy: 0.9333

--- Processing Fold 3/5 ---
  Fold 3 Training samples: 120
  Fold 3 Validation samples: 30
  Fold 3 Accuracy: 0.9333

--- Processing Fold 4/5 ---
  Fold 4 Training samples: 120
  Fold 4 Validation samples: 30
  Fold 4 Accuracy: 0.9667

--- Processing Fold 5/5 ---
  Fold 5 Training samples: 120
  Fold 5 Validation samples: 30
  Fold 5 Accuracy: 0.9667

--- Average 5-Fold Cross-Validation Accuracy: 0.9600 ---

Cross-validation finished.
