In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

# Load the dataset
df = pd.read_csv("customer_segmentation.csv")  # Replace with your actual CSV file

# Select relevant features for clustering
features = ['Age', 'Annual_Income', 'Spending_Score']

# Ensure selected features exist in the dataset
for feature in features:
    if feature not in df.columns:
        raise KeyError(f"Column '{feature}' not found in dataset!")

X = df[features]

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply K-Means Clustering
num_clusters = 4  # Choose optimal clusters using the Elbow Method
kmeans = KMeans(n_clusters=num_clusters, random_state=42, n_init=10)
df['Cluster'] = kmeans.fit_predict(X_scaled)

# Save the segmented data
df.to_csv("customer_segmented_dataset.csv", index=False)

# Print the cluster distribution
print(df['Cluster'].value_counts())
print("Customer segmentation completed and saved as 'customer_segmented_dataset.csv'.")



Cluster
2    6
1    6
0    5
3    3
Name: count, dtype: int64
Customer segmentation completed and saved as 'customer_segmented_dataset.csv'.
