In [31]:
import pandas as pd
import numpy as np

# Load Sheet1 of the uploaded Excel file containing the 217×4 numeric table
df_numeric = pd.read_excel('data_numeric.xlsx', sheet_name='Sheet1', usecols="B:E")
df_numeric.columns = ['age', 'perfume_id', 'gender', 'profession_id']
df_numeric['id'] = np.arange(1, len(df_numeric) + 1)

features = ['age', 'perfume_id', 'gender', 'profession_id']
X = df_numeric[features].to_numpy()

initial_ids = [5, 10, 15, 20, 25]
# Select initial centroids based on initial_ids
centroids = (
    df_numeric.set_index('id')
              .loc[initial_ids, features]
              .to_numpy()
)

# Run one iteration of custom centroid update and assignment
for _ in range(1):
    # Calculate raw differences between each point and each centroid
    raw_diffs = (X[:, None, :] - centroids[None, :, :]).sum(axis=2)
    # Take absolute value to get distances
    dists = np.abs(raw_diffs)
    # Assign each point to the nearest centroid (cluster label)
    labels = np.argmin(dists, axis=1) + 1
    df_numeric['cluster'] = labels
    # Update centroids as the mean of assigned points
    centroids = df_numeric.groupby('cluster')[features].mean().reindex(range(1, 6)).to_numpy()

# Calculate means for each cluster and rename columns for output
means = df_numeric.groupby('cluster')[features].mean().rename(columns={
    'age': 'usia',
    'perfume_id': 'nama_parfum_numeric',
    'gender': 'jeniskelamin_numeric',
    'profession_id': 'profesi_numeric'
})

print("=== Result ===")
print(means)
print("\n=== Rounded (no decimals) ===")
print(means.round(0).astype(int))


=== Result ===
              usia  nama_parfum_numeric  jeniskelamin_numeric  profesi_numeric
cluster                                                                       
1        50.750000             1.321429              0.285714         1.678571
2        57.833333             1.777778              0.388889         2.444444
3        21.511628             1.627907              0.480620         2.930233
4        44.000000             1.416667              0.666667         2.416667
5        37.533333             1.666667              0.433333         1.766667

=== Rounded (no decimals) ===
         usia  nama_parfum_numeric  jeniskelamin_numeric  profesi_numeric
cluster                                                                  
1          51                    1                     0                2
2          58                    2                     0                2
3          22                    2                     0                3
4          44                  