In [1]:
import os
import sklearn
import librosa
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal, sparse

audio_dir = '../GTZAN/genres_original/'
corrupted_file = ['jazz.00054.wav']

In [2]:
from sklearn.preprocessing import StandardScaler

# pre processing
genres = os.listdir(audio_dir)

num_train = 90

# spectrogram
window = 'hamming'
nperseg = 4410
s_n_fft = 1024
s_hop_length = 512

spec_train_data = []
spec_test_data = []

spec_train_label = []
spec_test_label = []
spec_vote_length = []
spec_extended_test_label = []

# mfcc
n_mfcc = 15

mfcc_train_data = []
mfcc_test_data = []

mfcc_train_label = []
mfcc_test_label = []
mfcc_vote_length = []
mfcc_extended_test_label = []

# mel-spectrogram
ms_n_fft = 1024
ms_hop_length = 512

mspec_train_data = []
mspec_test_data = []

mspec_train_label = []
mspec_test_label = []
mspec_vote_length = []
mspec_extended_test_label = []


for genre in genres:
    print("Processing genre: " + genre)
    files = os.listdir(audio_dir + genre)
    
    # Split training and testing
    mask = num_train * [True] + (100 - num_train) * [False]
    np.random.shuffle(mask)

    for i, file in enumerate(files):        
        if file in corrupted_file:
            continue
        file_name = audio_dir + genre + '/' + file
        sound, sample_rate = librosa.load(file_name)
        
        # spectrogram
        spec = librosa.stft(y=sound, window=window, n_fft=s_n_fft, hop_length=s_hop_length)        
        spec_sample = np.log(np.abs(spec) + 1e-7)

        # mfcc 
        mfcc_sample = librosa.feature.mfcc(y=sound, sr=sample_rate, n_mfcc=n_mfcc)
        
        # mel-spectrogram
        mspec = librosa.feature.melspectrogram(y=sound, sr=sample_rate, window=window, n_fft=ms_n_fft, hop_length=ms_hop_length)        
        mspec_sample = np.log(np.abs(mspec) + 1e-7)
        
        if mask[i]:
            spec_train_data.append(spec_sample)
            mfcc_train_data.append(mfcc_sample)
            mspec_train_data.append(mspec_sample)
            
            spec_train_label.extend([genres.index(genre)] * spec_sample.shape[1])
            mfcc_train_label.extend([genres.index(genre)] * mfcc_sample.shape[1])
            mspec_train_label.extend([genres.index(genre)] * mspec_sample.shape[1])

        else:
            spec_test_data.append(spec_sample)
            mfcc_test_data.append(mfcc_sample)
            mspec_test_data.append(mspec_sample)
            
            spec_test_label.extend([genres.index(genre)])
            mfcc_test_label.extend([genres.index(genre)])
            mspec_test_label.extend([genres.index(genre)])
            
            spec_vote_length.append(spec_sample.shape[1])
            mfcc_vote_length.append(mfcc_sample.shape[1])
            mspec_vote_length.append(mspec_sample.shape[1])

            spec_extended_test_label.extend([genres.index(genre)] * spec_sample.shape[1])
            mfcc_extended_test_label.extend([genres.index(genre)] * mfcc_sample.shape[1])
            mspec_extended_test_label.extend([genres.index(genre)] * mspec_sample.shape[1])
            
# numpyify array
spec_train_data = np.hstack(spec_train_data)
mfcc_train_data = np.hstack(mfcc_train_data)
mspec_train_data = np.hstack(mspec_train_data)

spec_test_data = np.hstack(spec_test_data)
mfcc_test_data = np.hstack(mfcc_test_data)
mspec_test_data = np.hstack(mspec_test_data)

spec_train_label = np.array(spec_train_label)
mfcc_train_label = np.array(mfcc_train_label)
mspec_train_label = np.array(mspec_train_label)

spec_test_label = np.array(spec_test_label)
mfcc_test_label = np.array(mfcc_test_label)
mspec_test_label = np.array(mspec_test_label)

spec_vote_length = np.array(spec_vote_length)
mfcc_vote_length = np.array(mfcc_vote_length)
mspec_vote_length = np.array(mspec_vote_length)

spec_extended_test_label = np.array(spec_extended_test_label)
mfcc_extended_test_label = np.array(mfcc_extended_test_label)
mspec_extended_test_label = np.array(mspec_extended_test_label)


# 0 mean

# spectrogram
spec_scaler = StandardScaler()
spec_scaler.fit(spec_train_data.T)
spec_train_data = spec_scaler.transform(spec_train_data.T)
spec_test_data = spec_scaler.transform(spec_test_data.T)

# mfcc
mfcc_scaler = StandardScaler()
mfcc_scaler.fit(mfcc_train_data.T)
mfcc_train_data = mfcc_scaler.transform(mfcc_train_data.T)
mfcc_test_data = mfcc_scaler.transform(mfcc_test_data.T)

# mel-spectrogram
mspec_scaler = StandardScaler()
mspec_scaler.fit(mspec_train_data.T)
mspec_train_data = mspec_scaler.transform(mspec_train_data.T)
mspec_test_data = mspec_scaler.transform(mspec_test_data.T)

print('Done with preprocessing')

Processing genre: blues
Processing genre: classical
Processing genre: country
Processing genre: disco
Processing genre: hiphop
Processing genre: jazz
Processing genre: metal
Processing genre: pop
Processing genre: reggae
Processing genre: rock
Done with preprocessing


In [3]:
# PCA
from sklearn.decomposition import PCA
n_components = 15

# spectrogram
spec_pca = PCA(n_components=n_components)
spec_pca.fit(spec_train_data)
spec_X_train = spec_pca.transform(spec_train_data)
spec_X_test = spec_pca.transform(spec_test_data)

# mfcc 
mfcc_pca = PCA(n_components=n_components)
mfcc_pca.fit(mfcc_train_data)
mfcc_X_train = mfcc_pca.transform(mfcc_train_data)
mfcc_X_test = mfcc_pca.transform(mfcc_test_data)

# melspectrogram
mspec_pca = PCA(n_components=n_components)
mspec_pca.fit(mspec_train_data)
mspec_X_train = mspec_pca.transform(mspec_train_data)
mspec_X_test = mspec_pca.transform(mspec_test_data)

In [4]:
for i, genre in enumerate(genres):
    print (str(i) + ' ' + genre)

0 blues
1 classical
2 country
3 disco
4 hiphop
5 jazz
6 metal
7 pop
8 reggae
9 rock


In [7]:
# KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from collections import Counter

np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
k = 10

In [9]:
print('Training...')
knn = KNeighborsClassifier(n_neighbors=k, weights='distance')
knn.fit(spec_X_train, spec_train_label) 
print('Predicting...')
y_predict = knn.predict(spec_X_test)


cur_idx = 0
confidence = []
voted_label = []
for i in range(len(spec_test_label)):
    c = Counter(y_predict[cur_idx : cur_idx+spec_vote_length[i]])
    voted_label.append(c.most_common(1)[0][0])
    confidence.append(c.most_common(1)[0][1] / spec_vote_length[i])
    cur_idx = cur_idx + spec_vote_length[i]

print('confidence: ')
print(np.array(confidence))

print('voting accuracy: ')
print(np.sum(voted_label == spec_test_label) / spec_test_label.shape[0])

print('non-voting accuracy: ')
print(classification_report(spec_extended_test_label, y_predict))

Training...
Predicting...
confidence: 
[0.393 0.323 0.516 0.748 0.725 0.651 0.618 0.583 0.306 0.338 0.783 0.926
 0.372 0.252 0.968 0.906 0.838 0.677 0.981 0.621 0.320 0.450 0.245 0.285
 0.204 0.301 0.271 0.253 0.247 0.351 0.288 0.348 0.508 0.236 0.457 0.311
 0.383 0.267 0.319 0.391 0.343 0.239 0.465 0.556 0.327 0.260 0.266 0.445
 0.429 0.299 0.461 0.741 0.806 0.382 0.923 0.920 0.266 0.488 0.870 0.439
 0.951 0.940 0.974 0.886 0.725 1.000 1.000 0.681 0.825 0.565 0.660 0.619
 0.855 0.637 0.589 0.471 0.725 0.264 0.456 0.606 0.188 0.186 0.236 0.403
 0.358 0.298 0.304 0.243 0.360 0.350 0.338 0.390 0.268 0.725 0.241 0.196
 0.544 0.377 0.226 0.327]
voting accuracy: 
0.68
non-voting accuracy: 
              precision    recall  f1-score   support

           0       0.52      0.48      0.50     12930
           1       0.76      0.73      0.75     12936
           2       0.32      0.27      0.29     12933
           3       0.31      0.26      0.29     12936
           4       0.32      0.25  

In [10]:
print('Training...')
knn = KNeighborsClassifier(n_neighbors=k, weights='distance')
knn.fit(mfcc_X_train, mfcc_train_label) 
print('Predicting...')
y_predict = knn.predict(mfcc_X_test)


cur_idx = 0
confidence = []
voted_label = []
for i in range(len(mfcc_test_label)):
    c = Counter(y_predict[cur_idx : cur_idx+mfcc_vote_length[i]])
    voted_label.append(c.most_common(1)[0][0])
    confidence.append(c.most_common(1)[0][1] / mfcc_vote_length[i])
    cur_idx = cur_idx + mfcc_vote_length[i]

print('confidence: ')
print(np.array(confidence))

print('voting accuracy: ')
print(np.sum(voted_label == mfcc_test_label) / mfcc_test_label.shape[0])

print('non-voting accuracy: ')
print(classification_report(mfcc_extended_test_label, y_predict))

Training...
Predicting...
confidence: 
[0.297 0.367 0.837 0.838 0.781 0.668 0.811 0.438 0.341 0.343 0.888 0.847
 0.645 0.429 0.947 0.909 0.898 0.866 0.962 0.616 0.379 0.393 0.268 0.231
 0.228 0.256 0.271 0.284 0.267 0.448 0.324 0.346 0.386 0.195 0.486 0.358
 0.447 0.330 0.428 0.464 0.271 0.216 0.371 0.577 0.333 0.296 0.275 0.483
 0.442 0.346 0.456 0.658 0.727 0.581 0.995 0.997 0.247 0.732 0.995 0.439
 0.941 0.885 0.962 0.801 0.539 1.000 1.000 0.392 0.762 0.531 0.715 0.606
 0.961 0.729 0.601 0.510 0.749 0.292 0.644 0.680 0.212 0.288 0.300 0.486
 0.567 0.408 0.418 0.244 0.425 0.394 0.229 0.317 0.259 0.539 0.237 0.205
 0.439 0.227 0.393 0.212]
voting accuracy: 
0.74
non-voting accuracy: 
              precision    recall  f1-score   support

           0       0.60      0.55      0.57     12930
           1       0.75      0.80      0.77     12936
           2       0.29      0.27      0.28     12933
           3       0.33      0.31      0.32     12936
           4       0.37      0.25  

In [11]:
print('Training...')
knn = KNeighborsClassifier(n_neighbors=k, weights='distance')
knn.fit(mspec_X_train, mspec_train_label) 
print('Predicting...')
y_predict = knn.predict(mspec_X_test)


cur_idx = 0
confidence = []
voted_label = []
for i in range(len(mspec_test_label)):
    c = Counter(y_predict[cur_idx : cur_idx+mspec_vote_length[i]])
    voted_label.append(c.most_common(1)[0][0])
    confidence.append(c.most_common(1)[0][1] / mspec_vote_length[i])
    cur_idx = cur_idx + mspec_vote_length[i]

print('confidence: ')
print(np.array(confidence))

print('voting accuracy: ')
print(np.sum(voted_label == mspec_test_label) / mspec_test_label.shape[0])

print('non-voting accuracy: ')
print(classification_report(mspec_extended_test_label, y_predict))

Training...
Predicting...
confidence: 
[0.254 0.248 0.405 0.790 0.795 0.708 0.617 0.466 0.290 0.305 0.760 0.898
 0.475 0.222 0.940 0.872 0.778 0.732 0.940 0.545 0.363 0.505 0.329 0.331
 0.233 0.252 0.342 0.271 0.224 0.387 0.313 0.327 0.517 0.243 0.404 0.340
 0.343 0.278 0.315 0.366 0.288 0.200 0.396 0.425 0.340 0.258 0.252 0.437
 0.483 0.333 0.644 0.716 0.755 0.524 0.968 0.961 0.206 0.765 0.854 0.467
 0.951 0.916 0.968 0.812 0.661 1.000 1.000 0.575 0.816 0.493 0.615 0.511
 0.898 0.603 0.530 0.435 0.675 0.346 0.459 0.514 0.173 0.265 0.249 0.367
 0.460 0.352 0.318 0.300 0.412 0.235 0.282 0.495 0.304 0.661 0.301 0.202
 0.440 0.323 0.215 0.281]
voting accuracy: 
0.64
non-voting accuracy: 
              precision    recall  f1-score   support

           0       0.51      0.46      0.48     12930
           1       0.76      0.72      0.74     12936
           2       0.30      0.27      0.28     12933
           3       0.29      0.26      0.28     12936
           4       0.35      0.26  

In [17]:
# SVM
from sklearn import svm

In [13]:
print('Training...')
svm_model = svm.SVC(gamma='scale', verbose=True)
svm_model.fit(spec_X_train, spec_train_label)

print('Predicting...')
y_predict = svm_model.predict(spec_X_test)

cur_idx = 0
confidence = []
voted_label = []
for i in range(len(spec_test_label)):
    c = Counter(y_predict[cur_idx : cur_idx+spec_vote_length[i]])
    voted_label.append(c.most_common(1)[0][0])
    confidence.append(c.most_common(1)[0][1] / spec_vote_length[i])
    cur_idx = cur_idx + spec_vote_length[i]

print('confidence: ')
print(np.array(confidence))

print('voting accuracy: ')
print(np.sum(voted_label == spec_test_label) / spec_test_label.shape[0])

print("non-voting accuracy:")
print(classification_report(spec_extended_test_label, y_predict))

Training...
[LibSVM]Predicting...
confidence: 
[0.394 0.455 0.529 0.812 0.807 0.772 0.737 0.618 0.396 0.425 0.847 0.956
 0.558 0.415 0.986 0.954 0.911 0.654 0.999 0.720 0.538 0.520 0.284 0.394
 0.228 0.412 0.344 0.336 0.283 0.520 0.407 0.576 0.715 0.387 0.641 0.476
 0.466 0.377 0.394 0.501 0.492 0.266 0.515 0.726 0.474 0.469 0.261 0.366
 0.516 0.365 0.514 0.797 0.828 0.383 0.824 0.641 0.261 0.461 0.879 0.372
 0.971 0.963 0.990 0.910 0.657 0.850 0.811 0.747 0.954 0.572 0.840 0.883
 0.873 0.843 0.796 0.548 0.830 0.415 0.780 0.831 0.229 0.263 0.316 0.582
 0.422 0.377 0.585 0.344 0.597 0.436 0.329 0.570 0.431 0.657 0.288 0.307
 0.637 0.596 0.321 0.288]
voting accuracy: 
0.68
non-voting accuracy:
              precision    recall  f1-score   support

           0       0.56      0.55      0.55     12930
           1       0.77      0.78      0.77     12936
           2       0.34      0.33      0.33     12933
           3       0.41      0.34      0.37     12936
           4       0.41     

In [18]:
print('Training...')
svm_model = svm.SVC(gamma='scale', verbose=True)
svm_model.fit(mfcc_X_train, mfcc_train_label)

print('Predicting...')
y_predict = svm_model.predict(mfcc_X_test)

cur_idx = 0
confidence = []
voted_label = []
for i in range(len(mfcc_test_label)):
    c = Counter(y_predict[cur_idx : cur_idx+mfcc_vote_length[i]])
    voted_label.append(c.most_common(1)[0][0])
    confidence.append(c.most_common(1)[0][1] / mfcc_vote_length[i])
    cur_idx = cur_idx + mfcc_vote_length[i]

print('confidence: ')
print(np.array(confidence))

print('voting accuracy: ')
print(np.sum(voted_label == mfcc_test_label) / mfcc_test_label.shape[0])

print('non-voting accuracy: ')
print(classification_report(mfcc_extended_test_label, y_predict))

Training...
[LibSVM]Predicting...
confidence: 
[0.531 0.537 0.918 0.910 0.867 0.771 0.835 0.507 0.513 0.524 0.903 0.951
 0.602 0.473 0.975 0.875 0.856 0.804 0.993 0.826 0.569 0.531 0.363 0.359
 0.253 0.305 0.432 0.370 0.403 0.606 0.434 0.570 0.585 0.276 0.500 0.439
 0.507 0.409 0.615 0.534 0.418 0.383 0.491 0.679 0.376 0.416 0.381 0.547
 0.510 0.394 0.519 0.704 0.821 0.476 0.930 0.951 0.231 0.540 0.916 0.502
 0.951 0.918 0.985 0.819 0.660 0.810 0.780 0.416 0.892 0.523 0.851 0.829
 0.852 0.876 0.729 0.589 0.831 0.421 0.875 0.783 0.236 0.353 0.407 0.715
 0.631 0.505 0.599 0.318 0.479 0.435 0.340 0.548 0.306 0.660 0.316 0.257
 0.412 0.339 0.241 0.193]
voting accuracy: 
0.7
non-voting accuracy: 
              precision    recall  f1-score   support

           0       0.61      0.65      0.63     12930
           1       0.81      0.83      0.82     12936
           2       0.34      0.34      0.34     12933
           3       0.40      0.37      0.38     12936
           4       0.41     

In [19]:
print('Training...')
svm_model = svm.SVC(gamma='scale', verbose=True)
svm_model.fit(mspec_X_train, mspec_train_label)

print('Predicting...')
y_predict = svm_model.predict(mspec_X_test)

cur_idx = 0
confidence = []
voted_label = []
for i in range(len(mspec_test_label)):
    c = Counter(y_predict[cur_idx : cur_idx+mspec_vote_length[i]])
    voted_label.append(c.most_common(1)[0][0])
    confidence.append(c.most_common(1)[0][1] / mspec_vote_length[i])
    cur_idx = cur_idx + mspec_vote_length[i]

print('confidence: ')
print(np.array(confidence))

print('voting accuracy: ')
print(np.sum(voted_label == mspec_test_label) / mspec_test_label.shape[0])

print('non-voting accuracy: ')
print(classification_report(mspec_extended_test_label, y_predict))

Training...
[LibSVM]Predicting...
confidence: 
[0.311 0.393 0.578 0.872 0.896 0.866 0.768 0.510 0.374 0.387 0.807 0.937
 0.538 0.234 0.976 0.953 0.836 0.758 0.971 0.587 0.620 0.627 0.271 0.490
 0.322 0.346 0.467 0.324 0.274 0.470 0.453 0.557 0.720 0.379 0.612 0.441
 0.452 0.329 0.351 0.473 0.384 0.232 0.485 0.672 0.464 0.448 0.282 0.357
 0.480 0.346 0.701 0.695 0.828 0.410 0.634 0.562 0.226 0.647 0.690 0.374
 0.976 0.955 0.972 0.867 0.695 0.937 0.754 0.616 0.910 0.520 0.826 0.810
 0.819 0.829 0.740 0.625 0.809 0.376 0.809 0.753 0.202 0.262 0.428 0.533
 0.526 0.424 0.438 0.490 0.594 0.450 0.292 0.717 0.339 0.695 0.328 0.261
 0.533 0.533 0.281 0.303]
voting accuracy: 
0.69
non-voting accuracy: 
              precision    recall  f1-score   support

           0       0.55      0.55      0.55     12930
           1       0.74      0.76      0.75     12936
           2       0.34      0.36      0.35     12933
           3       0.38      0.31      0.34     12936
           4       0.44    

In [None]:
# semi-supervised KNN

k = 10

# Spectrogram

# separate labeled and unlabeled train data
labeled_percentage = 0.1
num_true = int(spec_X_train.shape[0] * labeled_percentage)
num_false = spec_X_train.shape[0] - num_true
labeled_mask = np.array(num_true * [True] + num_false * [False])
np.random.shuffle(labeled_mask)

spec_labeled_train_data = spec_X_train[labeled_mask,:]
spec_labeled_train_label = spec_train_label[labeled_mask]

spec_unlabeled_train_data = spec_X_train[~labeled_mask,:]

In [44]:
# Labeling unlabeled train data

print('Training...')
knn = KNeighborsClassifier(n_neighbors=k, weights='distance')
knn.fit(spec_labeled_train_data, spec_labeled_train_label) 
print('Predicting...')
y_predict = knn.predict(spec_unlabeled_train_data)
print('Done')

Training...
Predicting...


In [59]:
spec_semi_train_data = np.concatenate((spec_labeled_train_data, spec_unlabeled_train_data))
spec_semi_train_label = np.concatenate((spec_labeled_train_label, y_predict))

print('Training...')
knn = KNeighborsClassifier(n_neighbors=k, weights='distance')
knn.fit(spec_semi_train_data, spec_semi_train_label) 
print('Predicting...')
y_predict = knn.predict(spec_X_test)


cur_idx = 0
confidence = []
voted_label = []
for i in range(len(spec_test_label)):
    c = Counter(y_predict[cur_idx : cur_idx+spec_vote_length[i]])
    voted_label.append(c.most_common(1)[0][0])
    confidence.append(c.most_common(1)[0][1] / spec_vote_length[i])
    cur_idx = cur_idx + spec_vote_length[i]

print('confidence: ')
print(np.array(confidence))

print('voting accuracy: ')
print(classification_report(voted_label, spec_test_label))

print('non-voting accuracy: ')
print(classification_report(spec_extended_test_label, y_predict))

Training...
Predicting...
confidence: 
[0.380 0.254 0.421 0.709 0.663 0.599 0.607 0.789 0.391 0.451 0.798 0.947
 0.363 0.243 0.981 0.922 0.862 0.703 0.998 0.576 0.353 0.445 0.335 0.330
 0.256 0.255 0.275 0.369 0.369 0.248 0.316 0.308 0.598 0.248 0.555 0.316
 0.445 0.247 0.404 0.346 0.367 0.358 0.498 0.629 0.286 0.273 0.284 0.353
 0.444 0.248 0.430 0.735 0.829 0.432 0.783 0.758 0.235 0.334 0.804 0.424
 0.981 0.964 0.988 0.927 0.885 0.924 0.845 0.794 0.951 0.728 0.675 0.677
 0.760 0.705 0.609 0.558 0.753 0.341 0.538 0.592 0.252 0.250 0.381 0.305
 0.255 0.229 0.315 0.220 0.373 0.333 0.414 0.466 0.364 0.885 0.244 0.231
 0.647 0.372 0.338 0.415]
voting accuracy: 
              precision    recall  f1-score   support

           0       0.70      0.78      0.74         9
           1       0.90      0.90      0.90        10
           2       0.30      1.00      0.46         3
           3       0.20      0.67      0.31         3
           4       0.40      0.67      0.50         6
        

In [61]:
# semi-supervised KNN

# MFCC

# separate labeled and unlabeled train data
labeled_percentage = 0.1
num_true = int(mfcc_X_train.shape[0] * labeled_percentage)
num_false = mfcc_X_train.shape[0] - num_true
labeled_mask = np.array(num_true * [True] + num_false * [False])
np.random.shuffle(labeled_mask)

mfcc_labeled_train_data = mfcc_X_train[labeled_mask,:]
mfcc_labeled_train_label = mfcc_train_label[labeled_mask]

mfcc_unlabeled_train_data = mfcc_X_train[~labeled_mask,:]

In [62]:
# Labeling unlabeled train data

print('Training...')
knn = KNeighborsClassifier(n_neighbors=k, weights='distance')
knn.fit(mfcc_labeled_train_data, mfcc_labeled_train_label) 
print('Predicting...')
y_predict = knn.predict(mfcc_unlabeled_train_data)
print('Done')

Training...
Predicting...
Done


In [63]:
mfcc_semi_train_data = np.concatenate((mfcc_labeled_train_data, mfcc_unlabeled_train_data))
mfcc_semi_train_label = np.concatenate((mfcc_labeled_train_label, y_predict))

print('Training...')
knn = KNeighborsClassifier(n_neighbors=k, weights='distance')
knn.fit(mfcc_semi_train_data, mfcc_semi_train_label) 
print('Predicting...')
y_predict = knn.predict(mfcc_X_test)


cur_idx = 0
confidence = []
voted_label = []
for i in range(len(mfcc_test_label)):
    c = Counter(y_predict[cur_idx : cur_idx+mfcc_vote_length[i]])
    voted_label.append(c.most_common(1)[0][0])
    confidence.append(c.most_common(1)[0][1] / mfcc_vote_length[i])
    cur_idx = cur_idx + mfcc_vote_length[i]

print('confidence: ')
print(np.array(confidence))

print('voting accuracy: ')
print(classification_report(voted_label, mfcc_test_label))

print('non-voting accuracy: ')
print(classification_report(mfcc_extended_test_label, y_predict))

Training...
Predicting...
confidence: 
[0.251 0.304 0.763 0.824 0.813 0.649 0.812 0.528 0.482 0.278 0.899 0.909
 0.798 0.491 0.981 0.954 0.960 0.928 0.978 0.582 0.312 0.348 0.305 0.243
 0.216 0.309 0.261 0.348 0.325 0.427 0.428 0.341 0.397 0.233 0.394 0.476
 0.512 0.351 0.382 0.390 0.265 0.291 0.376 0.663 0.314 0.381 0.233 0.384
 0.462 0.333 0.537 0.694 0.769 0.633 0.917 0.944 0.356 0.573 0.848 0.548
 0.979 0.942 0.981 0.865 0.613 0.833 0.787 0.495 0.886 0.591 0.774 0.698
 0.853 0.813 0.676 0.592 0.782 0.333 0.686 0.723 0.162 0.171 0.286 0.405
 0.506 0.347 0.489 0.248 0.479 0.430 0.256 0.464 0.316 0.613 0.209 0.273
 0.532 0.325 0.563 0.277]
voting accuracy: 
              precision    recall  f1-score   support

           0       0.70      1.00      0.82         7
           1       1.00      0.91      0.95        11
           2       0.20      0.50      0.29         4
           3       0.40      0.80      0.53         5
           4       0.30      0.75      0.43         4
        

In [64]:
# semi-supervised KNN

# Mel-spectrogram

# separate labeled and unlabeled train data
labeled_percentage = 0.1
num_true = int(mspec_X_train.shape[0] * labeled_percentage)
num_false = mspec_X_train.shape[0] - num_true
labeled_mask = np.array(num_true * [True] + num_false * [False])
np.random.shuffle(labeled_mask)

mspec_labeled_train_data = mspec_X_train[labeled_mask,:]
mspec_labeled_train_label = mspec_train_label[labeled_mask]

mspec_unlabeled_train_data = mspec_X_train[~labeled_mask,:]

In [66]:
# Labeling unlabeled train data

print('Training...')
knn = KNeighborsClassifier(n_neighbors=k, weights='distance')
knn.fit(mspec_labeled_train_data, mspec_labeled_train_label) 
print('Predicting...')
y_predict = knn.predict(mspec_unlabeled_train_data)
print('Done')

Training...
Predicting...
Done


In [67]:
mspec_semi_train_data = np.concatenate((mspec_labeled_train_data, mspec_unlabeled_train_data))
mspec_semi_train_label = np.concatenate((mspec_labeled_train_label, y_predict))

print('Training...')
knn = KNeighborsClassifier(n_neighbors=k, weights='distance')
knn.fit(mspec_semi_train_data, mspec_semi_train_label) 
print('Predicting...')
y_predict = knn.predict(mspec_X_test)


cur_idx = 0
confidence = []
voted_label = []
for i in range(len(mspec_test_label)):
    c = Counter(y_predict[cur_idx : cur_idx+mspec_vote_length[i]])
    voted_label.append(c.most_common(1)[0][0])
    confidence.append(c.most_common(1)[0][1] / mspec_vote_length[i])
    cur_idx = cur_idx + mspec_vote_length[i]

print('confidence: ')
print(np.array(confidence))

print('voting accuracy: ')
print(classification_report(voted_label, mspec_test_label))

print('non-voting accuracy: ')
print(classification_report(mspec_extended_test_label, y_predict))

Training...
Predicting...
confidence: 
[0.254 0.210 0.426 0.780 0.776 0.693 0.625 0.679 0.560 0.370 0.797 0.929
 0.568 0.227 0.987 0.881 0.830 0.763 0.986 0.537 0.455 0.561 0.350 0.338
 0.200 0.278 0.309 0.381 0.322 0.316 0.363 0.362 0.572 0.295 0.474 0.418
 0.408 0.282 0.400 0.381 0.296 0.292 0.422 0.525 0.358 0.251 0.211 0.421
 0.526 0.336 0.673 0.753 0.809 0.504 0.671 0.656 0.219 0.530 0.649 0.432
 0.979 0.957 0.987 0.898 0.836 0.954 0.797 0.718 0.942 0.651 0.584 0.555
 0.735 0.626 0.481 0.531 0.677 0.404 0.465 0.476 0.189 0.186 0.245 0.290
 0.361 0.288 0.324 0.343 0.459 0.214 0.397 0.514 0.362 0.836 0.341 0.224
 0.462 0.362 0.270 0.340]
voting accuracy: 
              precision    recall  f1-score   support

           0       0.50      0.71      0.59         7
           1       0.90      1.00      0.95         9
           2       0.40      0.50      0.44         8
           3       0.20      1.00      0.33         2
           4       0.40      0.80      0.53         5
        