In [8]:
from sklearn.cluster import KMeans
import pandas as pd

# Load the data into a pandas DataFrame
data = pd.DataFrame({
    'Product': ['Butter', 'Butter', 'Butter', 'Butter', 'Butter',
                'Cheese', 'Cheese', 'Cheese', 'Cheese', 'Cheese',
                'Yogurt', 'Yogurt', 'Yogurt', 'Yogurt', 'Yogurt'],
    'Price': [8.00, 12.00, 14.00, 16.00, 18.00,
              5.00, 6.00, 8.00, 10.00, 12.00,
              2.00, 2.50, 3.00, 4.00, 5.00]
})

data['Product'] = pd.Categorical(data['Product'])

# Choose the number of clusters
k = 3

products = data['Product'].unique()

# Create an empty dataframe to store the results
results = pd.DataFrame(columns=['Product', 'Price', 'Cluster'])

# Loop over each product and cluster it separately
for product in products:
    # Get the data for the current product
    product_data = data[data['Product'] == product].copy()
    product_data.drop(columns=['Product'], inplace=True)

    # Run the K-means algorithm
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(product_data)

    # Get the cluster labels for each data point
    labels = kmeans.predict(product_data)

    # Rename the clusters based on average price
    means = product_data.groupby(labels).mean()
    means.sort_values('Price', inplace=True)
    cluster_map = {i: f'P{j+1}' for j, i in enumerate(means.index)}
    labels = [cluster_map[label] for label in labels]


    # Add the labels as a new column in the data
    product_data['Cluster'] = labels

    # Add the results to the overall results dataframe
    product_data['Product'] = product
    product_data = product_data[['Product', 'Price', 'Cluster']]
    results = pd.concat([results, product_data])


In [9]:
results

Unnamed: 0,Product,Price,Cluster
0,Butter,8.0,P1
1,Butter,12.0,P2
2,Butter,14.0,P2
3,Butter,16.0,P3
4,Butter,18.0,P3
5,Cheese,5.0,P1
6,Cheese,6.0,P1
7,Cheese,8.0,P2
8,Cheese,10.0,P2
9,Cheese,12.0,P3
