In [22]:
from sklearn.metrics import silhouette_score
from k_means import k_means_clustering
from hierarchical import agglomerative_clustering
import pandas as pd


In [48]:
def silhouette(path_to_featurevec_csv: str, clustering_csv_path: str = None, max_clusters: int = None, min_clusters: int = 2):
    df_feature_vector = pd.read_csv(path_to_featurevec_csv).drop('method_name', axis=1).values
    
    if clustering_csv_path: 
        df_clustering = pd.read_csv(clustering_csv_path).drop('method_name', axis=1).values.ravel()
        return silhouette_score(df_feature_vector, df_clustering)
    else:
        kmean_dict = {}
        agglom_dict = {}
        for k in range(min_clusters, max_clusters + 1):
            df_clustering = k_means_clustering(n_clusters=k, path_to_featurevec_csv=path_to_featurevec_csv, get=True,save=True).drop('method_name', axis=1).values.ravel()
            kmean_dict[k] = silhouette_score(df_feature_vector, df_clustering)
            df_clustering = agglomerative_clustering(path_to_featurevec_csv=path_to_featurevec_csv, n_clusters=k, get = True,save=True).drop('method_name', axis=1).values.ravel()
            agglom_dict[k] = silhouette_score(df_feature_vector, df_clustering)
        return kmean_dict, agglom_dict

def utility_print(kmean_dict, agglom_dict):
    print('Agglomerative:')
    for k, v in agglom_dict.items():
        print(f'{k}, {v:.3f}')
    print('\nK-means:')
    for k, v in kmean_dict.items():
        print(f'{k}, {v:.3f}')
    


In [50]:
utility_print(*silhouette(path_to_featurevec_csv='./XSDHandler.csv',max_clusters=5))

Agglomerative:
2, 0.563
3, 0.564
4, 0.409
5, 0.418

K-means:
2, 0.273
3, 0.286
4, 0.134
5, -0.079


In [51]:
s = silhouette(path_to_featurevec_csv='./XSDHandler.csv', clustering_csv_path='./XSDHandler_kmeans_5.csv')
print(f'{s:.3f}')

-0.079
