In [None]:
import pandas as pd
from sklearn.metrics import silhouette_score

# Load cluster labels from all methods
dbscan_labels = pd.read_csv('../outputs/clusters/dbscan_labels.csv')
agnes_labels = pd.read_csv('../outputs/clusters/agnes_labels.csv')  # From Phase 1
kmeans_labels = pd.read_csv('../outputs/clusters/kmeans_labels.csv')  # From Phase 1

# Load scaled data
X_scaled = pd.read_csv('../outputs/scaled_data.csv')

# Compare Silhouette Scores
methods = {
    'DBSCAN': dbscan_labels,
    'AGNES': agnes_labels,
    'K-Means': kmeans_labels
}

scores = {}
for name, labels in methods.items():
    if len(set(labels)) > 1:  # At least 2 clusters needed
        scores[name] = silhouette_score(X_scaled, labels)

# Print results
print("Silhouette Scores:")
for method, score in scores.items():
    print(f"{method}: {score:.2f}")

# Plot scores
plt.bar(scores.keys(), scores.values(), color=['#8A2BE2', '#DA70D6', '#FF69B4'])
plt.title('Comparison of Clustering Methods')
plt.ylabel('Silhouette Score')
plt.savefig('../outputs/plots/silhouette_scores.png')
plt.show()