# 1. Multi-class and Multi-Label Classification Using Support Vector Machines

In [1]:
import random
import warnings
warnings.filterwarnings("ignore")
from sklearn import metrics
import pandas as pd
import numpy as np
from math import log10

In [2]:
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.multiclass import OneVsRestClassifier

In [3]:
from sklearn.metrics import hamming_loss
from tqdm import tqdm
from statistics import mode
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
from sklearn.metrics import silhouette_score
from sklearn.metrics import jaccard_similarity_score
from sklearn.multioutput import ClassifierChain
from sklearn.multioutput import ClassifierChain
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedKFold

### Useful Functions

In [4]:
def compute_hamming_loss(pred):
    hamming_loss = []
    for l in categories:
        hamming_loss.append(metrics.hamming_loss(test_data[l], pred[l]))
    return np.mean(hamming_loss)

### (a) Download the Anuran Calls (MFCCs) Data Set from: https://archive.ics.uci.edu/ml/datasets/Anuran+Calls+%28MFCCs%29. Choose 70% of the data randomly as the training set.

In [8]:
df = pd.read_csv('data/Frogs_MFCCs.csv')

In [9]:
train_data, test_data = train_test_split(df, train_size=0.7, shuffle=True)

### (b) Each instance has three labels: Families, Genus, and Species. Each of the labels has multiple classes. We wish to solve a multi-class and multi-label problem. One of the most important approaches to multi-class classification is to train a classifier for each label. We first try this approach:

### i. Research exact match and hamming score/ loss methods for evaluating multi-label classification and use them in evaluating the classifiers in this problem.

In [10]:
df.columns.values

array(['MFCCs_ 1', 'MFCCs_ 2', 'MFCCs_ 3', 'MFCCs_ 4', 'MFCCs_ 5',
       'MFCCs_ 6', 'MFCCs_ 7', 'MFCCs_ 8', 'MFCCs_ 9', 'MFCCs_10',
       'MFCCs_11', 'MFCCs_12', 'MFCCs_13', 'MFCCs_14', 'MFCCs_15',
       'MFCCs_16', 'MFCCs_17', 'MFCCs_18', 'MFCCs_19', 'MFCCs_20',
       'MFCCs_21', 'MFCCs_22', 'Family', 'Genus', 'Species', 'RecordID'],
      dtype=object)

In [11]:
categories = ['Family', 'Genus', 'Species']

In [12]:
x_train = train_data.iloc[:, :-4]
x_test = test_data.iloc[:, :-4]
y_train = train_data.iloc[:, -4:-1]
y_test = test_data.iloc[:, -4:-1]

In [13]:
# Using pipeline for applying logistic regression and one vs rest classifier
LogReg_pipeline = Pipeline([
                ('clf', OneVsRestClassifier(LogisticRegression(solver='sag'), n_jobs=-1)),
            ])
for category in categories:
    print('**Processing {}**'.format(category))
    
    # Training logistic regression model on train data
    LogReg_pipeline.fit(x_train, train_data[category])
    
    # calculating test accuracy
    prediction = LogReg_pipeline.predict(x_test)
    print('Hamming Loss is {}'.format(round(hamming_loss(test_data[category], prediction), 2)))
    print('Exact Match score is {}'.format(round(accuracy_score(test_data[category], prediction), 2)))
    print("\n")   

**Processing Family**
Hamming Loss is 0.06
Exact Match score is 0.94


**Processing Genus**
Hamming Loss is 0.07
Exact Match score is 0.93


**Processing Species**
Hamming Loss is 0.06
Exact Match score is 0.94




### ii. Train a SVM for each of the labels, using Gaussian kernels and one versus all classifiers. Determine the weight of the SVM penalty and the width of the Gaussian Kernel using 10 fold cross validation. You are welcome to try to solve the problem with both standardized and raw attributes and report the results.

In [5]:
def get_c_range(x_train, y_train):
    c_ = np.logspace(-5, 8, 10)
    scores = []
    for c in c_:
        svc = SVC(kernel='rbf', C=c)
        svc.fit(x_train, y_train)
        scores.append(svc.score(x_train, y_train))
    scores = np.array(scores)
    ind = np.argwhere(scores > 0.9).flatten()
    c_1 = c_[ind[0]]
    c_2 = c_[ind[-1]]
    return c_1, c_2

In [7]:
def get_gamma_range(x_train, y_train):
    gammas = np.append(np.logspace(-4, -1, 10), np.logspace(0, 2, 10))
    scores = []
    for g in gammas:
        svc = SVC(kernel='rbf', gamma=g)
        svc.fit(x_train, y_train)
        scores.append(svc.score(x_train, y_train))
    scores = np.array(scores)
    ind = np.argwhere(scores > 0.9).flatten()
    g_1 = gammas[ind[0]]
    g_2 = gammas[ind[-1]]
    return g_1, g_2

In [15]:
pred = pd.DataFrame()
for category in categories:
    print('**Processing {}**'.format(category))
    c_l, c_h = get_c_range(x_train, train_data[category])
    g_l, g_h = get_gamma_range(x_train, train_data[category])
    parameters = {'C':np.logspace(log10(c_l), log10(c_h), 10), 
              'gamma': np.logspace(log10(g_l), log10(g_h), 10)}
    
    svc = SVC(kernel='rbf', decision_function_shape='ovr')
    kf = StratifiedKFold(n_splits=10, shuffle=True)
    clf = GridSearchCV(svc, parameters, cv=kf, scoring='accuracy')
    clf.fit(x_train, train_data[category])
    print('Best CV Score {}'.format(round(clf.best_score_, 2)))
    
    # calculating test accuracy
    sv = clf.best_estimator_
    y_pred = sv.predict(x_test)
    y_true = test_data[category]
    pred[category] = y_pred
    print('Best Parameter {}'.format(clf.best_params_))
    print('Precision {}'.format(round(metrics.precision_score(y_true, y_pred, average='macro'), 2)))
    print('Recall {}'.format(round(metrics.recall_score(y_true, y_pred, average='macro'), 2)))
    print('F1 Score {}'.format(round(metrics.f1_score(y_true, y_pred, average='macro'), 2)))
    print('Exact Match score is {}'.format(round(accuracy_score(y_true, y_pred), 2)))
    print("\n\n")

**Processing Family**
Best CV Score 0.99
Best Parameter {'C': 38.04056104782507, 'gamma': 2.3462288481422626}
Precision 1.0
Recall 0.98
F1 Score 0.99
Exact Match score is 0.99



**Processing Genus**
Best CV Score 0.99
Best Parameter {'C': 38.04056104782507, 'gamma': 1.406527242105237}
Precision 0.98
Recall 0.98
F1 Score 0.98
Exact Match score is 0.99



**Processing Species**
Best CV Score 0.99
Best Parameter {'C': 5.994842503189409, 'gamma': 1.406527242105237}
Precision 0.98
Recall 0.98
F1 Score 0.98
Exact Match score is 0.99





In [16]:
print('Prediction for Test data:\n')
pred.head()

Prediction for Test data:



Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraAndre
1,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
2,Hylidae,Hypsiboas,HypsiboasCinerascens
3,Hylidae,Hypsiboas,HypsiboasCordobae
4,Dendrobatidae,Ameerega,Ameeregatrivittata


In [17]:
pred = pd.DataFrame(pred)
print("The hamming loss for Gaussian kernel SVM is", compute_hamming_loss(pred))

('The hamming loss for Gaussian kernel SVM is', 0.007565230816736143)


### iii. Repeat 1(b)ii and L1-penalized SVMs. Remember to standardize the attributes

In [6]:
def get_c_range2(x_train, y_train):
    c_ = np.logspace(-5, 8, 10)
    scores = []
    for c in c_:
        svc = LinearSVC(penalty='l1', C=c, multi_class='ovr', dual=False)
        svc.fit(x_train, y_train)
        scores.append(svc.score(x_train, y_train))
    scores = np.array(scores)
    ind = np.argwhere(scores > 0.9).flatten()
    c_1 = c_[ind[0]]
    c_2 = c_[ind[-1]]
    return c_1, c_2

In [18]:
pred2 = pd.DataFrame()
for category in categories:
    print('**Processing {}**'.format(category))
    c_l, c_h = get_c_range2(x_train, train_data[category])
    parameters = {'C':np.logspace(log10(c_l), log10(c_h), 20)}
    
    svc = LinearSVC(penalty='l1', multi_class='ovr', dual=False)
    kf = StratifiedKFold(n_splits=10, shuffle=True)
    clf = GridSearchCV(svc, parameters, cv=kf, scoring='accuracy')
    clf.fit(x_train, train_data[category])
    print('Best CV Score {}'.format(round(clf.best_score_, 2)))
    
    # calculating test accuracy
    sv = clf.best_estimator_
    y_pred = sv.predict(x_test)
    y_true = test_data[category]
    pred2[category] = y_pred
    print('Best Parameter {}'.format(clf.best_params_))
    print('Precision {}'.format(round(metrics.precision_score(y_true, y_pred, average='macro'), 2)))
    print('Recall {}'.format(round(metrics.recall_score(y_true, y_pred, average='macro'), 2)))
    print('F1 Score {}'.format(round(metrics.f1_score(y_true, y_pred, average='macro'), 2)))
    print('Exact Match score is {}'.format(round(accuracy_score(y_true, y_pred), 2)))
    print("\n\n")

**Processing Family**
Best CV Score 0.93
Best Parameter {'C': 41.11829402435833}
Precision 0.7
Recall 0.7
F1 Score 0.7
Exact Match score is 0.94



**Processing Genus**
Best CV Score 0.95
Best Parameter {'C': 335.9818286283784}
Precision 0.96
Recall 0.83
F1 Score 0.88
Exact Match score is 0.96



**Processing Species**
Best CV Score 0.95
Best Parameter {'C': 5.032159359259993}
Precision 0.94
Recall 0.89
F1 Score 0.91
Exact Match score is 0.96





In [19]:
print('Prediction for Test data:\n')
pred2.head()

Prediction for Test data:



Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Adenomera,AdenomeraAndre
1,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
2,Hylidae,Hypsiboas,HypsiboasCinerascens
3,Hylidae,Hypsiboas,HypsiboasCordobae
4,Dendrobatidae,Ameerega,Ameeregatrivittata


In [20]:
pred2 = pd.DataFrame(pred2)
print("The hamming loss for linear SVM is")
compute_hamming_loss(pred2)

The hamming loss for linear SVM is


0.04570016983171221

### iv. Repeat 1(b)iii by using SMOTE or any other method you know to remedy class imbalance. Report your conclusions about the classifiers you trained.

In [21]:
sm = SMOTE(random_state=2)

pred3 = pd.DataFrame()
for category in categories:
    print('**Processing {}**'.format(category))
    x_train_res, y_train_res = sm.fit_sample(x_train, train_data[category])
    
    c_l, c_h = get_c_range2(x_train_res, y_train_res)
    parameters = {'C':np.logspace(log10(c_l), log10(c_h), 20)}
    
    svc = LinearSVC(penalty='l1', multi_class='ovr', dual=False)
    kf = StratifiedKFold(n_splits=10, shuffle=True)
    clf = GridSearchCV(svc, parameters, cv=kf, scoring='accuracy')
    clf.fit(x_train_res, y_train_res)
    print('Best CV Score {}'.format(round(clf.best_score_, 2)))
    
    # calculating test accuracy
    sv = clf.best_estimator_
    y_pred = sv.predict(x_test)
    y_true = test_data[category]
    pred3[category] = y_pred
    print('Best Parameter {}'.format(clf.best_params_))
    print('Precision {}'.format(round(metrics.precision_score(y_true, y_pred, average='macro'), 2)))
    print('Recall {}'.format(round(metrics.recall_score(y_true, y_pred, average='macro'), 2)))
    print('F1 Score {}'.format(round(metrics.f1_score(y_true, y_pred, average='macro'), 2)))
    print('Exact Match score is {}'.format(round(accuracy_score(y_true, y_pred), 2)))
    print("\n\n")

**Processing Family**
Best CV Score 0.95
Best Parameter {'C': 14.38449888287663}
Precision 0.74
Recall 0.94
F1 Score 0.79
Exact Match score is 0.93



**Processing Genus**
Best CV Score 0.95
Best Parameter {'C': 22432.475028984205}
Precision 0.75
Recall 0.92
F1 Score 0.8
Exact Match score is 0.91



**Processing Species**
Best CV Score 0.95
Best Parameter {'C': 5529.553425383406}
Precision 0.89
Recall 0.93
F1 Score 0.91
Exact Match score is 0.96





In [22]:
print('Prediction for Test data:\n')
pred3.head()

Prediction for Test data:



Unnamed: 0,Family,Genus,Species
0,Leptodactylidae,Osteocephalus,AdenomeraAndre
1,Leptodactylidae,Adenomera,AdenomeraHylaedactylus
2,Hylidae,Hypsiboas,HypsiboasCinerascens
3,Hylidae,Hypsiboas,HypsiboasCordobae
4,Dendrobatidae,Ameerega,Ameeregatrivittata


In [23]:
pred3 = pd.DataFrame(pred3)
print("The hamming loss for linear SVM is")
compute_hamming_loss(pred3)

The hamming loss for linear SVM is


0.06530801296896711

# 2. K-Means Clustering on a Multi-Class and Multi-Label Data Set

## Monte-Carlo Simulation: Perform the following procedures 50 times, and report the average and standard deviation of the 50 Hamming Distances that you calculate.

### (a) Use k-means clustering on the whole Anuran Calls (MFCCs) Data Set (do not split the data into train and test, as we are not performing supervised learning in this exercise). Choose k automatically based on one of the methods provided in the slides (CH or Gap Statistics or scree plots or Silhouettes) or any other method you know.

### (b) In each cluster, determine which family is the majority by reading the true labels. Repeat for genus and species.

### (c) Now for each cluster you have a majority label triplet (family, genus, species). Calculate the average Hamming distance (score) between the true labels and the labels assigned by clusters.

In [37]:
hamming_losses = []
X = df.iloc[:, :-4]

for i in tqdm(range(50)):
    silh = []
    results = []
    for k in range(2, 15):
        kmeans = KMeans(n_clusters=k).fit(X)
        labels = kmeans.labels_
        results.append(labels)
        silh.append(silhouette_score(X, labels))
    index = np.argmax(silh)
    best_k = index + 2
    print("The best k is", best_k)
    best_labels = results[index]
    
    #decide majority labels for each k-means cluster
    labels_df = df[['Family', 'Genus', 'Species']].copy()
    labels_df['kmeans_label'] = best_labels
    
    majority = {}
    for l in range(best_k):
        cluster = labels_df[labels_df['kmeans_label'] == l]
        triplet = {}
        for tl in ['Family', 'Genus', 'Species']:
            triplet[tl] = cluster[tl].value_counts().idxmax()
        majority[l] = triplet
    print("Majority labels:", majority)
    
    #compute hamming loss
    misclassifed = 0
    for l in range(best_k):
        cluster = labels_df[labels_df['kmeans_label'] == l]
        for tl in ['Family', 'Genus', 'Species']:
            misclassifed += sum(cluster[tl] != majority[l][tl])
    hamming_loss = float(misclassifed) / (3 * len(df))
    hamming_losses.append(hamming_loss)


  0%|          | 0/50 [00:00<?, ?it/s][A
  2%|▏         | 1/50 [00:23<18:59, 23.25s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



  4%|▍         | 2/50 [00:47<18:48, 23.52s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



  6%|▌         | 3/50 [01:11<18:31, 23.65s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Adenomera', 'Species': 'AdenomeraAndre', 'Family': 'Leptodactylidae'}})



  8%|▊         | 4/50 [01:35<18:15, 23.82s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



 10%|█         | 5/50 [01:59<17:57, 23.94s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 2: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 12%|█▏        | 6/50 [02:23<17:31, 23.91s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



 14%|█▍        | 7/50 [02:47<17:11, 24.00s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 2: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 16%|█▌        | 8/50 [03:11<16:48, 24.01s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 18%|█▊        | 9/50 [03:36<16:27, 24.10s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 20%|██        | 10/50 [04:00<16:04, 24.12s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



 22%|██▏       | 11/50 [04:24<15:40, 24.10s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}})



 24%|██▍       | 12/50 [04:48<15:12, 24.01s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}})



 26%|██▌       | 13/50 [05:12<14:50, 24.07s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}})



 28%|██▊       | 14/50 [05:36<14:27, 24.11s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}})



 30%|███       | 15/50 [06:00<14:03, 24.10s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



 32%|███▏      | 16/50 [06:24<13:40, 24.15s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



 34%|███▍      | 17/50 [06:49<13:16, 24.15s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 36%|███▌      | 18/50 [07:13<12:52, 24.15s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



 38%|███▊      | 19/50 [07:37<12:27, 24.11s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 2: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 40%|████      | 20/50 [08:01<12:05, 24.19s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 2: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



 42%|████▏     | 21/50 [08:25<11:40, 24.17s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 2: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 3: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}})



 44%|████▍     | 22/50 [08:49<11:12, 24.02s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



 46%|████▌     | 23/50 [09:13<10:48, 24.03s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 48%|████▊     | 24/50 [09:37<10:22, 23.94s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}})



 50%|█████     | 25/50 [10:00<09:57, 23.89s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 52%|█████▏    | 26/50 [10:24<09:34, 23.92s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 2: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 54%|█████▍    | 27/50 [10:49<09:11, 23.99s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 2: {'Genus': 'Adenomera', 'Species': 'AdenomeraAndre', 'Family': 'Leptodactylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



 56%|█████▌    | 28/50 [11:13<08:48, 24.01s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



 58%|█████▊    | 29/50 [11:37<08:24, 24.04s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



 60%|██████    | 30/50 [12:01<08:00, 24.01s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}})



 62%|██████▏   | 31/50 [12:25<07:36, 24.00s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 64%|██████▍   | 32/50 [12:49<07:12, 24.05s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



 66%|██████▌   | 33/50 [13:13<06:49, 24.06s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 3: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}})



 68%|██████▊   | 34/50 [13:37<06:25, 24.07s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}})



 70%|███████   | 35/50 [14:01<06:00, 24.06s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 72%|███████▏  | 36/50 [14:25<05:36, 24.04s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 2: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 74%|███████▍  | 37/50 [14:49<05:11, 23.95s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 3: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}})



 76%|███████▌  | 38/50 [15:12<04:46, 23.86s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 78%|███████▊  | 39/50 [15:37<04:22, 23.91s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 80%|████████  | 40/50 [16:00<03:58, 23.90s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 82%|████████▏ | 41/50 [16:24<03:35, 23.94s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 2: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 84%|████████▍ | 42/50 [16:48<03:11, 23.96s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 86%|████████▌ | 43/50 [17:12<02:47, 23.93s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 3: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}})



 88%|████████▊ | 44/50 [17:36<02:23, 23.91s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}})



 90%|█████████ | 45/50 [18:00<01:59, 23.96s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}})



 92%|█████████▏| 46/50 [18:24<01:35, 23.94s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 1: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 2: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 94%|█████████▍| 47/50 [18:48<01:12, 24.01s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}})



 96%|█████████▌| 48/50 [19:12<00:47, 23.85s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 2: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



 98%|█████████▊| 49/50 [19:36<00:23, 23.88s/it][A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 2: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})



100%|██████████| 50/50 [19:59<00:00, 23.81s/it][A
[A

('The best k is', 4)
('Majority labels:', {0: {'Genus': 'Adenomera', 'Species': 'AdenomeraHylaedactylus', 'Family': 'Leptodactylidae'}, 1: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCordobae', 'Family': 'Hylidae'}, 2: {'Genus': 'Ameerega', 'Species': 'Ameeregatrivittata', 'Family': 'Dendrobatidae'}, 3: {'Genus': 'Hypsiboas', 'Species': 'HypsiboasCinerascens', 'Family': 'Hylidae'}})


In [38]:
means = np.mean(hamming_losses)
stds = np.std(hamming_losses)
print("The average of hamming loss is", means)
print("The standard deviation of hamming loss is", stds) 

('The average of hamming loss is', 0.22422052351169797)
('The standard deviation of hamming loss is', 0.008730845482499541)


## ISLR Solution in 'islr_solutions.pdf' in the same directory.