# Clustering - Temel Kavramlar

Bu notebook, Clustering algoritmalarının (K-Means, DBSCAN, Hierarchical) temel kavramlarını içerir.


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
from sklearn.metrics import silhouette_score

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
%matplotlib inline


## 1. K-Means Clustering


In [None]:
# Veri seti
X, y_true = make_blobs(n_samples=300, centers=4, n_features=2, random_state=42)

# K-Means
kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)
y_pred = kmeans.fit_predict(X)

# Silhouette score
silhouette = silhouette_score(X, y_pred)
print(f"K-Means Silhouette Score: {silhouette:.4f}")

# Görselleştir
plt.figure(figsize=(10, 6))
plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap='viridis', alpha=0.6)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], 
            c='red', marker='x', s=200, linewidths=3, label='Centroids')
plt.title('K-Means Clustering')
plt.legend()
plt.show()


## 2. Optimal K Değerini Bulma (Elbow Method)


In [None]:
# Elbow method
inertias = []
silhouette_scores = []
k_range = range(2, 11)

for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans.fit(X)
    inertias.append(kmeans.inertia_)
    silhouette_scores.append(silhouette_score(X, kmeans.labels_))

# Görselleştir
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

ax1.plot(k_range, inertias, marker='o', linewidth=2)
ax1.set_xlabel('K Değeri')
ax1.set_ylabel('Inertia')
ax1.set_title('Elbow Method')
ax1.grid(True, alpha=0.3)

ax2.plot(k_range, silhouette_scores, marker='o', linewidth=2, color='green')
ax2.set_xlabel('K Değeri')
ax2.set_ylabel('Silhouette Score')
ax2.set_title('Silhouette Score vs K')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


## 3. DBSCAN Clustering


In [None]:
# DBSCAN (density-based clustering)
dbscan = DBSCAN(eps=0.5, min_samples=5)
y_pred_dbscan = dbscan.fit_predict(X)

# Cluster sayısı
n_clusters = len(set(y_pred_dbscan)) - (1 if -1 in y_pred_dbscan else 0)
n_noise = list(y_pred_dbscan).count(-1)

print(f"DBSCAN Cluster Sayısı: {n_clusters}")
print(f"Noise Points: {n_noise}")

# Görselleştir
plt.figure(figsize=(10, 6))
plt.scatter(X[:, 0], X[:, 1], c=y_pred_dbscan, cmap='viridis', alpha=0.6)
plt.title('DBSCAN Clustering')
plt.show()
