In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score

# Create a dataset with 10 rows
data = {
    'Age': [22, 25, 47, 52, 46, 56, 55, 60, 62, 63],
    'Income': [20, 22, 50, 60, 55, 80, 85, 90, 95, 100],
    'Purchased': [0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
}
df = pd.DataFrame(data)
X = df[['Age', 'Income']]
y = df['Purchased']

In [12]:
# Define Models
models = {
    'Logistic Regression': LogisticRegression(),
    'KNN': KNeighborsClassifier(n_neighbors=3)
}

# Stratified K-Fold Cross Validation
skf = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)

results = {}
for name, model in models.items():
    cv_scores = cross_val_score(model, X, y, cv=skf, scoring='accuracy')
    results[name] = cv_scores.mean()
    print(f"{name} Mean Accuracy: {cv_scores.mean():.4f}")

Logistic Regression Mean Accuracy: 1.0000
KNN Mean Accuracy: 0.8000


In [13]:
# Final Comparison
best_model = max(results, key=results.get)
print(f"\nBest Model: {best_model} with Accuracy: {results[best_model]:.4f}")


Best Model: Logistic Regression with Accuracy: 1.0000
