In [16]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix, accuracy_score, adjusted_rand_score, silhouette_score
import numpy as np
from scipy.stats import mode

data = {
    'ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 
           11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
    'Age': [25, 45, 29, 35, 40, 36, 50, 28, 30, 42, 
            23, 34, 48, 33, 26, 39, 53, 37, 32, 45],
    'Income': [50000, 64000, 58000, 52000, 61000, 
               71000, 80000, 67000, 64000, 72000,
               55000, 61000, 70000, 65000, 58000, 
               72000, 81000, 69000, 63000, 75000],
    'True_Label': [0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 
                   0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
}

def map_clusters_to_labels(true_labels, clusters):
    labels = np.zeros_like(clusters)
    for i in range(2):
        mask = (clusters == i)
        labels[mask] = mode(true_labels[mask])[0]
    return labels

In [17]:
df = pd.DataFrame(data)
X = df[['Age', 'Income']]
kmeans = KMeans(n_clusters=2, random_state=42)
kmeans.fit(X)

df['Cluster'] = kmeans.predict(X)
mapped_clusters = map_clusters_to_labels(df['True_Label'].values, df['Cluster'].values)

conf_matrix = confusion_matrix(df['True_Label'], mapped_clusters)
accuracy = accuracy_score(df['True_Label'], mapped_clusters)
ari = adjusted_rand_score(df['True_Label'], df['Cluster'])
silhouette = silhouette_score(X, df['Cluster'])

In [18]:
print("Cluster assignments:")
print(df[['ID', 'Age', 'Income', 'True_Label', 'Cluster']])
print("\nConfusion Matrix:")
print(conf_matrix)
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Adjusted Rand Index (ARI): {ari:.2f}")
print(f"Silhouette Score: {silhouette:.2f}")

Cluster assignments:
    ID  Age  Income  True_Label  Cluster
0    1   25   50000           0        1
1    2   45   64000           1        1
2    3   29   58000           0        1
3    4   35   52000           1        1
4    5   40   61000           1        1
5    6   36   71000           1        0
6    7   50   80000           0        0
7    8   28   67000           0        0
8    9   30   64000           0        1
9   10   42   72000           1        0
10  11   23   55000           0        1
11  12   34   61000           1        1
12  13   48   70000           0        0
13  14   33   65000           1        1
14  15   26   58000           0        1
15  16   39   72000           1        0
16  17   53   81000           0        0
17  18   37   69000           1        0
18  19   32   63000           0        1
19  20   45   75000           1        0

Confusion Matrix:
[[6 4]
 [5 5]]
Accuracy: 55.00%
Adjusted Rand Index (ARI): -0.04
Silhouette Score: 0.55
