## 1. Multi-class and Multi-Label Classification Using Support Vector Machines

Import packages

In [102]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, hamming_loss
from sklearn.svm import SVC, LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as imbpipeline
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, calinski_harabasz_score

### (a) Download the Anuran Calls (MFCCs) Data Set

In [104]:
df = pd.read_csv('../data/Anuran Calls (MFCCs)/Frogs_MFCCs.csv')
df

Unnamed: 0,MFCCs_ 1,MFCCs_ 2,MFCCs_ 3,MFCCs_ 4,MFCCs_ 5,MFCCs_ 6,MFCCs_ 7,MFCCs_ 8,MFCCs_ 9,MFCCs_10,...,MFCCs_17,MFCCs_18,MFCCs_19,MFCCs_20,MFCCs_21,MFCCs_22,Family,Genus,Species,RecordID
0,1.0,0.152936,-0.105586,0.200722,0.317201,0.260764,0.100945,-0.150063,-0.171128,0.124676,...,-0.108351,-0.077623,-0.009568,0.057684,0.118680,0.014038,Leptodactylidae,Adenomera,AdenomeraAndre,1
1,1.0,0.171534,-0.098975,0.268425,0.338672,0.268353,0.060835,-0.222475,-0.207693,0.170883,...,-0.090974,-0.056510,-0.035303,0.020140,0.082263,0.029056,Leptodactylidae,Adenomera,AdenomeraAndre,1
2,1.0,0.152317,-0.082973,0.287128,0.276014,0.189867,0.008714,-0.242234,-0.219153,0.232538,...,-0.050691,-0.023590,-0.066722,-0.025083,0.099108,0.077162,Leptodactylidae,Adenomera,AdenomeraAndre,1
3,1.0,0.224392,0.118985,0.329432,0.372088,0.361005,0.015501,-0.194347,-0.098181,0.270375,...,-0.136009,-0.177037,-0.130498,-0.054766,-0.018691,0.023954,Leptodactylidae,Adenomera,AdenomeraAndre,1
4,1.0,0.087817,-0.068345,0.306967,0.330923,0.249144,0.006884,-0.265423,-0.172700,0.266434,...,-0.048885,-0.053074,-0.088550,-0.031346,0.108610,0.079244,Leptodactylidae,Adenomera,AdenomeraAndre,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7190,1.0,-0.554504,-0.337717,0.035533,0.034511,0.443451,0.093889,-0.100753,0.037087,0.081075,...,0.069430,0.071001,0.021591,0.052449,-0.021860,-0.079860,Hylidae,Scinax,ScinaxRuber,60
7191,1.0,-0.517273,-0.370574,0.030673,0.068097,0.402890,0.096628,-0.116460,0.063727,0.089034,...,0.061127,0.068978,0.017745,0.046461,-0.015418,-0.101892,Hylidae,Scinax,ScinaxRuber,60
7192,1.0,-0.582557,-0.343237,0.029468,0.064179,0.385596,0.114905,-0.103317,0.070370,0.081317,...,0.082474,0.077771,-0.009688,0.027834,-0.000531,-0.080425,Hylidae,Scinax,ScinaxRuber,60
7193,1.0,-0.519497,-0.307553,-0.004922,0.072865,0.377131,0.086866,-0.115799,0.056979,0.089316,...,0.051796,0.069073,0.017963,0.041803,-0.027911,-0.096895,Hylidae,Scinax,ScinaxRuber,60


In [105]:
train_df, test_df = train_test_split(df,shuffle=True, test_size=0.3, random_state=42)
print(train_df.shape)
print(test_df.shape)

(5036, 26)
(2159, 26)


In [106]:
X_train = train_df.iloc[:, :-4].values
X_test = test_df.iloc[:, :-4].values

In [107]:
y_train_family = train_df['Family'].values
y_train_genus = train_df['Genus'].values
y_train_species = train_df['Species'].values

y_test_family = test_df['Family'].values
y_test_genus = test_df['Genus'].values
y_test_species = test_df['Species'].values

### (b) Train a classifier for each label

#### (i) Research

Exact Match Ratio: Percentage of samples where all labels are correctly predicted

Hamming Loss: Fraction of labels that are incorrectly predicted

In [111]:
def exact_match(y_true, y_pred):
    return accuracy_score(y_true, y_pred)

def hamming_score(y_true, y_pred):
    return 1 - hamming_loss(y_true, y_pred)

#### (ii) Train a SVM for each of the labels

In [113]:
#using both very large and very small parameters to train SVM (C)
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

param_grid = {
    'C': np.logspace(-3, 6, 15), #very large and very small parameters
    'gamma': np.linspace(0.1, 2, 8),
    'kernel': ['rbf']
}

def evaluate_model_svm(X_train, X_test, y_train, y_test, param_grid):
    svm = GridSearchCV(SVC(decision_function_shape='ovr'), 
                      param_grid, 
                      cv=10, 
                      n_jobs=-1,
                      scoring='accuracy')
    svm.fit(X_train, y_train)
    
    y_pred = svm.predict(X_test)
    
    print(f"best_parameters: {svm.best_params_}")
    print(f"Exact match ratio: {exact_match(y_test, y_pred)}")
    print(f"Hamming score: {hamming_score(y_test, y_pred)}")
    print(f"Hamming loss: {hamming_loss(y_test, y_pred)}")
    print()
    

In [114]:
print("Family Results:")
evaluate_model_svm(X_train_std, X_test_std, y_train_family, y_test_family, param_grid)
print("Genus Results:")
evaluate_model_svm(X_train_std, X_test_std, y_train_genus, y_test_genus, param_grid)
print("Species Results:")
evaluate_model_svm(X_train_std, X_test_std, y_train_species, y_test_species, param_grid)

Family Results:
best_parameters: {'C': 7.196856730011529, 'gamma': 0.1, 'kernel': 'rbf'}
Exact match ratio: 0.9925891616489115
Hamming score: 0.9925891616489115
Hamming loss: 0.007410838351088467

Genus Results:
best_parameters: {'C': 1.6378937069540647, 'gamma': 0.1, 'kernel': 'rbf'}
Exact match ratio: 0.9870310328855951
Hamming score: 0.9870310328855951
Hamming loss: 0.012968967114404817

Species Results:
best_parameters: {'C': 7.196856730011529, 'gamma': 0.1, 'kernel': 'rbf'}
Exact match ratio: 0.984251968503937
Hamming score: 0.984251968503937
Hamming loss: 0.015748031496062992



#### (iii) Repeat 1(b)ii with L1-penalized SVMs

In [116]:
param_grid_l1 = {
    'C': np.logspace(-3, 6, 20),
    'penalty': ['l1'],
    'dual': [False],
    'max_iter': [100000]
}

def evaluate_model_svm_l1(X_train, X_test, y_train, y_test, param_grid):
    svm = GridSearchCV(LinearSVC(), 
                      param_grid, 
                      cv=10, 
                      n_jobs=-1,
                      scoring='accuracy')
    svm.fit(X_train, y_train)
    
    y_pred = svm.predict(X_test)
    
    print(f"best_parameters: {svm.best_params_}")
    print(f"Exact match ratio: {exact_match(y_test, y_pred)}")
    print(f"Hamming score: {hamming_score(y_test, y_pred)}")
    print(f"Hamming loss: {hamming_loss(y_test, y_pred)}")
    print()


In [117]:
print("Family Results (L1):")
evaluate_model_svm_l1(X_train_std, X_test_std, y_train_family, y_test_family, param_grid_l1)
print("Genus Results (L1):")
evaluate_model_svm_l1(X_train_std, X_test_std, y_train_genus, y_test_genus, param_grid_l1)
print("Species Results (L1):")
evaluate_model_svm_l1(X_train_std, X_test_std, y_train_species, y_test_species, param_grid_l1)

Family Results (L1):
best_parameters: {'C': 0.6951927961775606, 'dual': False, 'max_iter': 100000, 'penalty': 'l1'}
Exact match ratio: 0.9282075034738305
Hamming score: 0.9282075034738305
Hamming loss: 0.07179249652616952

Genus Results (L1):
best_parameters: {'C': 18.32980710832434, 'dual': False, 'max_iter': 100000, 'penalty': 'l1'}
Exact match ratio: 0.9416396479851783
Hamming score: 0.9416396479851783
Hamming loss: 0.058360352014821676

Species Results (L1):
best_parameters: {'C': 2.069138081114788, 'dual': False, 'max_iter': 100000, 'penalty': 'l1'}
Exact match ratio: 0.9583140342751274
Hamming score: 0.9583140342751274
Hamming loss: 0.041685965724872626



#### (iv) Repeat 1(b)iii by using SMOTE or any other method for imbalance

In [119]:
param_grid_smote = {
    'svm__penalty': ['l1'],
    'svm__dual': [False],
    'svm__C': np.logspace(-3, 6, 20),
    'svm__multi_class': ['ovr'],
    'svm__max_iter': [10000]
}

def evaluate_model_svm_smote(X_train, X_test, y_train, y_test, param_grid):
    pipeline = imbpipeline(steps = [['smote', SMOTE(random_state=42)],
                                ['svm', LinearSVC()]])
    
    svm = GridSearchCV(pipeline, 
                      param_grid,
                      cv=10, 
                      n_jobs=-1,
                      scoring='accuracy')
    svm.fit(X_train, y_train)
    
    y_pred = svm.predict(X_test)
    
    print(f"best_parameters: {svm.best_params_}")
    print(f"Exact match ratio: {exact_match(y_test, y_pred)}")
    print(f"Hamming score: {hamming_score(y_test, y_pred)}")
    print(f"Hamming loss: {hamming_loss(y_test, y_pred)}")
    print()
    

In [120]:
print("Family Results (SMOTE):")
evaluate_model_svm_smote(X_train_std, X_test_std, y_train_family, y_test_family, param_grid_smote)
print("Genus Results (SMOTE):")
evaluate_model_svm_smote(X_train_std, X_test_std, y_train_genus, y_test_genus, param_grid_smote)
print("Species Results (SMOTE):")
evaluate_model_svm_smote(X_train_std, X_test_std, y_train_species, y_test_species, param_grid_smote)

Family Results (SMOTE):
best_parameters: {'svm__C': 6.158482110660261, 'svm__dual': False, 'svm__max_iter': 10000, 'svm__multi_class': 'ovr', 'svm__penalty': 'l1'}
Exact match ratio: 0.9092172301991662
Hamming score: 0.9092172301991663
Hamming loss: 0.09078276980083372

Genus Results (SMOTE):
best_parameters: {'svm__C': 54.555947811685144, 'svm__dual': False, 'svm__max_iter': 10000, 'svm__multi_class': 'ovr', 'svm__penalty': 'l1'}
Exact match ratio: 0.9018063918480778
Hamming score: 0.9018063918480779
Hamming loss: 0.09819360815192218

Species Results (SMOTE):
best_parameters: {'svm__C': 0.6951927961775606, 'svm__dual': False, 'svm__max_iter': 10000, 'svm__multi_class': 'ovr', 'svm__penalty': 'l1'}
Exact match ratio: 0.9564613246873552
Hamming score: 0.9564613246873552
Hamming loss: 0.04353867531264474



For class "Family":  
For class "Genus":
For class "Species":

## 2. K-Means Clustering on a Multi-Class and Multi-Label Data Set

### (a) (b) (c)

In [131]:
X = df.iloc[:, :-4]
y = df.iloc[:, -4:-1]
hamming_distance, hamming_loss = [], []
for i in range(50):
    print("Iteration: ", i+1)
    
    #(a) KMeans and best k
    silhouette_scores = []
    ch_scores = []
    k_range = range(2,51)
    for n in range(2, 51):
        model = KMeans(n_clusters = n, random_state = 42)
        cluster_labels = model.fit_predict(X)
        silhouette_scores.append(silhouette_score(X, cluster_labels))
        #ch_scores.append(calinski_harabasz_score(X, cluster_labels))
    best_k = k_range[np.argmax(silhouette_scores)]
    print(f"Optimal k based on silhouette score: {best_k}")

    #(b) majority family
    best_model = KMeans(n_clusters = best_k, random_state = 42)
    cluster_labels = model.fit_predict(X)
    cluster_major = pd.DataFrame(columns = y.columns)
    for c in range(best_k):
        pos, = np.where(cluster_labels == c)
        cluster_samples = y.iloc[pos, :]
        row = []
        for label in y.columns:
            curr = cluster_samples.loc[:, label].value_counts().index[0]
            row.append(curr)
        cluster_major.loc[c] = row
    display(cluster_major)

    #(c)Hamming distance, hamming score, hamming loss
    labels_misclassified = 0
    for x in range(best_k):
        pos, = np.where(cluster_labels == c)
        for label in y.loc[pos].values:
            missed = (label != cluster_major.loc[x].values)
            labels_misclassified += np.sum(missed)
    curr_hamming_dist = labels_misclassified / y.shape[0]
    curr_hamming_loss = labels_misclassified / (y.shape[0] * y.shape[1])
    hamming_distance.append(curr_hamming_dist)
    hamming_loss.append(curr_hamming_loss)
    print(f"Hamming Distance: {curr_hamming_dist}")
    print(f"Hamming Loss: {curr_hamming_loss}")
    print()
    

Iteration:  1
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  2
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  3
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  4
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  5
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  6
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  7
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  8
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  9
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  10
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  11
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  12
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  13
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  14
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  15
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  16
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  17
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  18
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  19
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  20
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  21
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  22
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  23
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  24
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  25
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  26
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  27
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  28
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  29
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  30
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  31
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  32
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  33
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  34
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  35
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  36
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  37
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  38
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  39
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  40
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  41
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  42
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  43
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  44
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  45
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  46
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  47
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  48
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  49
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457

Iteration:  50
Optimal k based on silhouette score: 4


Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
1,Hylidae,Hypsiboas,HypsiboasCinerascens
2,Leptodactylidae,Adenomera,AdenomeraAndre
3,Bufonidae,Rhinella,Rhinellagranulosa


Hamming Distance: 0.1467685892981237
Hamming Loss: 0.04892286309937457



In [137]:
print("Average Hamming Distance :", np.mean(hamming_distance))
print("Average Hamming Loss :", np.mean(hamming_loss))
print("Average Hamming Score ", (1 - np.mean(hamming_loss)))

Average Hamming Distance : 0.1467685892981237
Average Hamming Loss : 0.048922863099374575
Average Hamming Score  0.9510771369006255


References: <br>
https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html <br>
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.hamming_loss.html <br>
https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html