In [16]:
import pandas as pd
import numpy as np

# Load Sheet1 of the uploaded Excel file containing the 217×4 numeric table
df_numeric = pd.read_excel('data_numeric.xlsx', sheet_name='Sheet1', usecols="B:E")
# Ensure columns match: usia, Nama parfum_numeric, jeniskelamin_numeric, profesi_numeric
df_numeric.columns = ['age', 'perfume_id', 'gender', 'profession_id']

# Add an 'id' column from Excel row numbers 1–217
df_numeric['id'] = np.arange(1, len(df_numeric) + 1)

# Extract features array and pick initial centroids from IDs 5,10,15,20,25
features = ['age', 'perfume_id', 'gender', 'profession_id']
X = df_numeric[features].to_numpy()  # shape (217,4)

initial_ids = [5, 10, 15, 20, 25]
centroids = (
    df_numeric.set_index('id')
              .loc[initial_ids, features]
              .to_numpy()
)

# Compute one-iteration distances as in your Excel: abs(sum of signed diffs)
raw_diffs = (X[:, None, :] - centroids[None, :, :]).sum(axis=2)
dists    = np.abs(raw_diffs)

# Assign each record to nearest cluster
labels = np.argmin(dists, axis=1) + 1

# Compute the new centroids (first pass) by averaging the 217 records
df_numeric['cluster'] = labels
means = df_numeric.groupby('cluster')[features].mean().rename(columns={
    'age': 'usia',
    'perfume_id': 'nama_parfum_numeric',
    'gender': 'jeniskelamin_numeric',
    'profession_id': 'profesi_numeric'
})

# Display exactly your Sheet 1 results
print("=== Sheet 1 centroids (1st iteration, full precision) ===")
print(means)

print("\n=== Rounded to 6 decimals (for comparison) ===")
print(means.round())

=== Sheet 1 centroids (1st iteration, full precision) ===
              usia  nama_parfum_numeric  jeniskelamin_numeric  profesi_numeric
cluster                                                                       
1        50.750000             1.321429              0.285714         1.678571
2        57.833333             1.777778              0.388889         2.444444
3        21.511628             1.627907              0.480620         2.930233
4        44.000000             1.416667              0.666667         2.416667
5        37.533333             1.666667              0.433333         1.766667

=== Rounded to 6 decimals (for comparison) ===
         usia  nama_parfum_numeric  jeniskelamin_numeric  profesi_numeric
cluster                                                                  
1        51.0                  1.0                   0.0              2.0
2        58.0                  2.0                   0.0              2.0
3        22.0                  2.0           