In [1]:
import numpy as np
from sklearn.datasets import load_wine

# Load Wine dataset
data = load_wine()
X = data.data
y = data.target

# Combine features and labels to keep their relationship during shuffling
np.random.seed(42)  # For reproducibility
indices = np.random.permutation(len(X))  # Shuffle indices

# Split indices into 5 folds manually
K = 5
folds = np.array_split(indices, K)

# Print the fold indices
for i, fold in enumerate(folds):
    print(f"Fold {i+1}: {fold}")


Fold 1: [ 19  45 140  30  67  16 119 174 109 141  24 150  41 118  15 111 113  82
   9 114  18  66  60 169 171 164 117  65  90  55  29 128 145  31  12  42]
Fold 2: [158 137  98 159  38 108  85  68 143   2 100 122 154  51  76  56  26 153
 138 104  78  36  93  22 146  97  69 167  11   6  27 144   4  32  95 170]
Fold 3: [ 75  10 147 123   0 142 126  64  44  96  28  40 127  25  23 132  81  79
  84  39 101  86 120 173 162  47  94 135 166  61  73  33 125 133 139  62]
Fold 4: [176 112 115 105  53   5 160   3 165  49  35  80  77  34  46   7 124  43
  70 136 110  91  83 156 161  89   8  13  59 152 131  17  72 155 134]
Fold 5: [148 163  63  54 107  50 175  58  48  88  21  57 172 129  37 157 168   1
  52 149 130 151 103  99 116  87  74 121 177  20  71 106  14  92 102]


In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Step: Scale the full dataset once (recommended for logistic regression)
scaler = StandardScaler()
X = scaler.fit_transform(X)

accuracies = []

# Perform steps for each fold
for i in range(K):
    print(f"\n=== Fold {i+1} ===")
    
    # 1. Use fold i as test, rest as train
    test_idx = folds[i]
    train_idx = np.concatenate([folds[j] for j in range(K) if j != i])
    
    X_train, y_train = X[train_idx], y[train_idx]
    X_test, y_test = X[test_idx], y[test_idx]
    
    # 2. Train Logistic Regression
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)
    
    # 3. Predict on test set
    y_pred = model.predict(X_test)
    
    # 4. Calculate and store accuracy
    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)
    
    print(f"Accuracy: {acc:.4f}")



=== Fold 1 ===
Accuracy: 1.0000

=== Fold 2 ===
Accuracy: 0.9722

=== Fold 3 ===
Accuracy: 1.0000

=== Fold 4 ===
Accuracy: 0.9714

=== Fold 5 ===
Accuracy: 1.0000


In [4]:
# After running the fold loop that collects accuracies

print("\n=== Summary ===")
for i, acc in enumerate(accuracies, 1):
    print(f"Fold {i} Accuracy: {acc:.4f}")

average_accuracy = np.mean(accuracies)
print(f"\nAverage Accuracy over {K} folds: {average_accuracy:.4f}")



=== Summary ===
Fold 1 Accuracy: 1.0000
Fold 2 Accuracy: 0.9722
Fold 3 Accuracy: 1.0000
Fold 4 Accuracy: 0.9714
Fold 5 Accuracy: 1.0000

Average Accuracy over 5 folds: 0.9887
