# KNN - Proje: Optimal K Değeri Bulma

Bu proje, KNN için optimal K değerini bulur ve model performansını optimize eder.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.datasets import make_classification

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
%matplotlib inline


## 1. Veri Hazırlama


In [None]:
# Veri seti
X, y = make_classification(n_samples=2000, n_features=15, n_informative=12, 
                           n_redundant=3, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Feature scaling (KNN için kritik!)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Eğitim seti boyutu: {X_train_scaled.shape}")
print(f"Test seti boyutu: {X_test_scaled.shape}")


## 2. Optimal K Değerini Bulma


In [None]:
# Farklı K değerleri ile cross-validation
k_range = range(1, 31)
cv_scores = []

for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train_scaled, y_train, cv=5, scoring='accuracy')
    cv_scores.append(scores.mean())

# En iyi K değerini bul
best_k = k_range[np.argmax(cv_scores)]
print(f"En iyi K değeri: {best_k}")
print(f"En iyi CV Accuracy: {max(cv_scores):.4f}")

# Görselleştir
plt.figure(figsize=(12, 6))
plt.plot(k_range, cv_scores, marker='o', linewidth=2, markersize=8)
plt.axvline(x=best_k, color='r', linestyle='--', label=f'Optimal K = {best_k}')
plt.xlabel('K Değeri')
plt.ylabel('Cross-Validation Accuracy')
plt.title('K Değeri vs Model Performansı')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()


## 3. Final Model ve Değerlendirme


In [None]:
# Optimal K ile model eğit
knn_optimal = KNeighborsClassifier(n_neighbors=best_k)
knn_optimal.fit(X_train_scaled, y_train)
y_pred = knn_optimal.predict(X_test_scaled)

# Performans metrikleri
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")
print(f"\nConfusion Matrix:\n{confusion_matrix(y_test, y_pred)}")
print(f"\nClassification Report:\n{classification_report(y_test, y_pred)}")
