## Silhouette score for various clustering algorithms

In [1]:
import pandas as pd
from sklearn.metrics import silhouette_score

## The K-Means algorithm

In [2]:
from sklearn.cluster import KMeans

In [4]:
df = pd.read_excel('socio-eco-reduced.xlsx')
df = df.drop(columns='Unnamed: 0')
df

Unnamed: 0,Normalised Age (Yrs),Gender (Male),Helmet (Full-mask),Education: Not Graduated,Education: Graduated,Income 0-40k,Income >=40k,Experience (<5yr),Experience (5-20yr),Experience (>20yr)
0,0.175439,1,1,1,0,1,0,0,1,0
1,0.421053,1,0,0,1,0,1,0,1,0
2,0.210526,1,1,0,1,0,1,0,1,0
3,0.491228,0,0,1,0,1,0,1,0,0
4,0.035088,1,1,1,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...
476,0.210526,1,1,0,1,0,1,0,1,0
477,0.105263,1,1,0,1,0,1,1,0,0
478,0.736842,1,1,0,1,0,1,0,0,1
479,0.245614,1,1,0,1,0,1,0,1,0


In [28]:
scores = {}

for i in range(2,11):
    kmeans_1 = KMeans(n_clusters=i,init='k-means++',n_init=500,algorithm='lloyd',random_state=42)
    kmeans_1.fit(df)
    score = silhouette_score(df,kmeans_1.labels_)
    scores[i] = score

list1 = []
list2 = []

for key, value in scores.items():
    list1.append(key)
    list2.append(value)

silhouette_scores = pd.DataFrame()
silhouette_scores['Clusters']=list1
silhouette_scores['Scores']=list2

silhouette_scores

Unnamed: 0,Clusters,Scores
0,2,0.400194
1,3,0.417856
2,4,0.469786
3,5,0.516879
4,6,0.55864
5,7,0.592534
6,8,0.632464
7,9,0.672118
8,10,0.7007


In [30]:
scores = {}

for i in range(2,11):
    kmeans_1 = KMeans(n_clusters=i,init='random',n_init=500,algorithm='lloyd',random_state=42)
    kmeans_1.fit(df)
    score = silhouette_score(df,kmeans_1.labels_)
    scores[i] = score

list1 = []
list2 = []

for key, value in scores.items():
    list1.append(key)
    list2.append(value)

silhouette_scores = pd.DataFrame()
silhouette_scores['Clusters']=list1
silhouette_scores['Scores']=list2

silhouette_scores

Unnamed: 0,Clusters,Scores
0,2,0.400194
1,3,0.417856
2,4,0.469786
3,5,0.516879
4,6,0.559264
5,7,0.595318
6,8,0.632464
7,9,0.672118
8,10,0.696649


## The K-Medoids algorithm

In [31]:
from sklearn_extra.cluster import KMedoids

In [57]:
scores = {}

for i in range(2,11):
    kmedoids = KMedoids(n_clusters=i,metric='euclidean',method='pam',init='heuristic',max_iter=1000,random_state=42)
    kmedoids.fit(df)
    score = silhouette_score(df,kmedoids.labels_)
    scores[i] = score

list1 = []
list2 = []

for key, value in scores.items():
    list1.append(key)
    list2.append(value)

silhouette_scores = pd.DataFrame()
silhouette_scores['Clusters']=list1
silhouette_scores['Scores']=list2

silhouette_scores

Unnamed: 0,Clusters,Scores
0,2,0.374456
1,3,0.417038
2,4,0.450195
3,5,0.484335
4,6,0.539894
5,7,0.592991
6,8,0.620044
7,9,0.657539
8,10,0.691939


In [58]:
scores = {}

for i in range(2,11):
    kmedoids = KMedoids(n_clusters=i,metric='manhattan',method='pam',init='heuristic',max_iter=1000,random_state=42)
    kmedoids.fit(df)
    score = silhouette_score(df,kmedoids.labels_)
    scores[i] = score

list1 = []
list2 = []

for key, value in scores.items():
    list1.append(key)
    list2.append(value)

silhouette_scores = pd.DataFrame()
silhouette_scores['Clusters']=list1
silhouette_scores['Scores']=list2

silhouette_scores

Unnamed: 0,Clusters,Scores
0,2,0.368871
1,3,0.398345
2,4,0.444793
3,5,0.480578
4,6,0.531971
5,7,0.589887
6,8,0.617498
7,9,0.658396
8,10,0.692941


In [63]:
scores = {}

for i in range(2,11):
    kmedoids = KMedoids(n_clusters=i,metric='cosine',method='pam',init='k-medoids++',max_iter=1000,random_state=42)
    kmedoids.fit(df)
    score = silhouette_score(df,kmedoids.labels_)
    scores[i] = score

list1 = []
list2 = []

for key, value in scores.items():
    list1.append(key)
    list2.append(value)

silhouette_scores = pd.DataFrame()
silhouette_scores['Clusters']=list1
silhouette_scores['Scores']=list2

silhouette_scores

Unnamed: 0,Clusters,Scores
0,2,0.376165
1,3,0.402517
2,4,0.446166
3,5,0.480446
4,6,0.534435
5,7,0.590744
6,8,0.617013
7,9,0.64731
8,10,0.692857
