In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import KFold
import numpy as np

# --- Create Sample Data ---
data = {
    'A_Nominal': ['Red', 'Blue', 'Green', 'Red', 'Blue', 'Green', 'Red', 'Blue', 'Green', 'Red'],
    'B_Ordinal': ['Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'Low'],
    'C_Ordinal': ['Small', 'Medium', 'Large', 'Large', 'Medium', 'Small', 'Small', 'Medium', 'Large', 'Large'],
    'Numerical_Feature': [10.5, 22.1, 5.0, 15.3, 30.0, 8.8, 12.0, 25.5, 6.2, 18.1],
    'T_Target': ['Class_Y', 'Class_Z', 'Class_Y', 'Class_X', 'Class_Z', 'Class_X', 'Class_Y', 'Class_Z', 'Class_X', 'Class_Y']
}
df = pd.DataFrame(data)

df.head()


In [None]:
ordinal_map_B = {'Low': 0, 'Medium': 1, 'High': 2}
ordinal_map_C = {'Small': 0, 'Medium': 1, 'Large': 2}

df['B_Ordinal_Encoded'] = df['B_Ordinal'].map(ordinal_map_B)
df['C_Ordinal_Encoded'] = df['C_Ordinal'].map(ordinal_map_C)

df_A_one_hot = pd.get_dummies(df['A_Nominal'], prefix='A') # Let the users know these features are from A_Nominal.
df_A_one_hot.head()

In [None]:
# Define the features
feature_cols = ['Numerical_Feature', 'B_Ordinal_Encoded', 'C_Ordinal_Encoded']
X_processed = pd.concat([df[feature_cols], df_A_one_hot], axis=1)

# Turn into numpy array
X = X_processed.values

In [None]:
# Labels transformation

target_map = {
    'Class_X': 0,
    'Class_Y': 1,
    'Class_Z': 2
}
y = df['T_Target'].map(target_map)

print("Feature X Shape:", X.shape)
print("Label y Shape:", y.shape)

In [None]:
K = 3 # Number of folds
kf = KFold(n_splits=K, shuffle=True, random_state=42)
all_val_scores = []

In [None]:
# Initialize a simple classification model
model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=42)


In [None]:
# Loop through each split generated by kf.split()
for fold, (train_index, val_index) in enumerate(kf.split(X)):
    print(f"--- Fold {fold + 1}/{K} ---")

    # Split data into training and validation sets for the current fold
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    print(f"Train Size: {len(X_train)} | Validation Size: {len(X_val)}")

    # **TRAIN** the model
    model.fit(X_train, y_train)

    # **VALIDATE** the model
    y_pred = model.predict(X_val)
    fold_accuracy = accuracy_score(y_val, y_pred)
    all_val_scores.append(fold_accuracy)

    print(f"Fold {fold + 1} Validation Accuracy: {fold_accuracy:.4f}\n")

In [None]:
print("-" * 40)
mean_score = np.mean(all_val_scores)
std_score = np.std(all_val_scores)

print(f"All {K} Fold Validation Accuracies: {all_val_scores}")
print(f"Average Validation Accuracy: {mean_score:.4f} (+/- {std_score:.4f})")
