#  K-Means Clustering - Machine Learning

## Step 1: Import Libraries

In [None]:
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score

## Step 2: Load and Prepare Data

In [None]:
# Load dataset
data = pd.read_csv("cleaned_final_data.csv")

In [None]:
# Remove label column (we don't need target labels in clustering)
data_clustering = data.drop(columns=["market_value_category"])  

# Standardize the features
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_clustering)

print(f"Dataset shape after removing labels: {data_clustering.shape}")

## Step 3: Finding Optimal k (Elbow Method)

In [None]:
# Find the optimal k using the Elbow Method
wcss = []  # Within-Cluster Sum of Squares
k_range = range(1, 11)

for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans.fit(df_scaled)
    wcss.append(kmeans.inertia_)

# Plot the Elbow Method
plt.figure(figsize=(8, 4))
plt.plot(k_range, wcss, marker='o', linestyle='--')
plt.xlabel("Number of Clusters (k)")
plt.ylabel("WCSS (Within-Cluster Sum of Squares)")
plt.title("Elbow Method to Find Optimal k")
plt.show()

## Step 4: Evaluating Clustering Quality (Silhouette Score)