In [7]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Load dataset
df = pd.read_csv("product_recommendation.csv")

# Check available columns
print("Available Columns:", df.columns.tolist())

# Select appropriate features for clustering
selected_features = ['Product_Sales', 'Product_Rating', 'Customer_Review', 'Product_Quality']

# Ensure the selected columns exist in the dataset
for feature in selected_features:
    if feature not in df.columns:
        raise KeyError(f"Column '{feature}' not found in dataset!")

X = df[selected_features]

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Determine the optimal number of clusters using the Elbow Method
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42, n_init=10)
    kmeans.fit(X_scaled)
    wcss.append(kmeans.inertia_)

# Train K-Means clustering model
optimal_k = 5  # Choose the number of clusters based on the elbow method
kmeans = KMeans(n_clusters=optimal_k, init='k-means++', random_state=42, n_init=10)
df['Customer_Segment'] = kmeans.fit_predict(X_scaled)

# Save the clustered data
df.to_csv("customer_segments.csv", index=False)

# Display first few rows of the clustered data
print(df.head())

Available Columns: ['Product_Sales', 'Product_Rating', 'Customer_Review', 'Product_Quality']




   Product_Sales  Product_Rating  Customer_Review  Product_Quality  \
0            183             2.6                5                9   
1           1524             2.1                8                3   
2           3353             2.9                9                9   
3           7232             1.9                1                6   
4           4269             3.4                4                1   

   Customer_Segment  
0                 0  
1                 0  
2                 2  
3                 3  
4                 4  
