# Clustering - Proje: Müşteri Segmentasyonu

Bu proje, müşteri verilerini kullanarak segmentasyon yapar.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from sklearn.datasets import make_blobs

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
%matplotlib inline


In [None]:
# Örnek müşteri verisi (gerçek projede gerçek veri kullanın)
X, _ = make_blobs(n_samples=1000, centers=4, n_features=5, random_state=42)
feature_names = ['Age', 'Annual_Income', 'Spending_Score', 'Purchase_Frequency', 'Avg_Transaction_Value']
df = pd.DataFrame(X, columns=feature_names)

print("Veri seti:")
print(df.head())
print(f"\nVeri seti boyutu: {df.shape}")

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df)

# Optimal K bulma
k_range = range(2, 11)
silhouette_scores = []

for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    labels = kmeans.fit_predict(X_scaled)
    sil_score = silhouette_score(X_scaled, labels)
    silhouette_scores.append(sil_score)
    print(f"K={k}: Silhouette Score = {sil_score:.4f}")

# En iyi K
best_k = k_range[np.argmax(silhouette_scores)]
print(f"\nEn iyi K değeri: {best_k}")

# Final clustering
kmeans_final = KMeans(n_clusters=best_k, random_state=42, n_init=10)
df['Cluster'] = kmeans_final.fit_predict(X_scaled)

print(f"\nCluster dağılımı:\n{df['Cluster'].value_counts().sort_index()}")
