# PAM 기반 군집분석

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from pyclustering.cluster.kmedoids import kmedoids
from pyclustering.utils import distance_metric, type_metric
import matplotlib.pyplot as plt
from pyclustering.cluster import cluster_visualizer

In [None]:
# 예제 데이터 생성
data = {
    'Country': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L'],
    'Feature1': [12, 34, 67, 10, 30, 10, 10, 10, 20, 40, 70, 15],
    'Feature2': [23, 45, 78, 12, 35, 11, 9, 12, 22, 43, 72, 17]
}
df = pd.DataFrame(data)

# 데이터 스케일링
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[['Feature1', 'Feature2']])

In [None]:
# PAM 군집화
initial_medoids = [0, 1, 2, 3, 4]  # 초기 medoids 인덱스 (데이터 개수에 맞게 조정)
metric = distance_metric(type_metric.EUCLIDEAN)
kmedoids_instance = kmedoids(scaled_data, initial_medoids, metric=metric)
kmedoids_instance.process()
clusters = kmedoids_instance.get_clusters()

In [None]:
# 결과 시각화
plt.figure(figsize=(12, 6))

# 군집화 결과 플롯
for cluster_id, cluster_points in enumerate(clusters):
    cluster_data = np.array([scaled_data[point] for point in cluster_points])
    plt.scatter(cluster_data[:, 0], cluster_data[:, 1], label=f'Cluster {cluster_id + 1}')

# Plot medoids
medoids = kmedoids_instance.get_medoids()
medoid_data = np.array([scaled_data[medoid] for medoid in medoids])
plt.scatter(medoid_data[:, 0], medoid_data[:, 1], color='red', marker='x', s=100, label='Medoids')

plt.title('PAM Clustering (K-Medoids)')
plt.xlabel('Feature1 (scaled)')
plt.ylabel('Feature2 (scaled)')
plt.legend()
plt.show()