In [None]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Step 1: Create synthetic dataset
# Set n_informative + n_redundant + n_repeated < n_features to avoid ValueError
X, y = make_classification(n_samples=1000, n_features=3, n_informative=2, n_redundant=0, n_repeated=0, n_classes=2, random_state=42)

# Step 2: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Define parameter grid (k values)
param_grid = {
    'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'algorithm':[ 'ball_tree', 'kd_tree', 'brute']
}
param_grid
# {'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
# Step 4: Create GridSearchCV object
grid = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)

# Step 5: Fit the model
grid.fit(X_train, y_train)

# Step 6: Get best k and score
print("✅ Best k value (n_neighbors):", grid.best_params_)
print("✅ Best cross-validation accuracy:", round(grid.best_score_ * 100, 2), "%")

# Step 7: Evaluate on test data
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)
print("✅ Test accuracy:", round(accuracy_score(y_test, y_pred) * 100, 2), "%")

# ✅ Best k value (n_neighbors): {'algorithm': 'ball_tree', 'n_neighbors': 5}
# ✅ Best cross-validation accuracy: 90.38 %
# ✅ Test accuracy: 87.5 %


✅ Best k value (n_neighbors): {'algorithm': 'ball_tree', 'n_neighbors': 5}
✅ Best cross-validation accuracy: 90.38 %
✅ Test accuracy: 87.5 %
