In [73]:
from tensorflow import keras
import numpy as np
import pyaudio
import time
import librosa
import os
from glob import glob
import scipy.cluster.hierarchy as hcluster
import matplotlib.pyplot as plt

In [74]:
#load model
model = keras.models.load_model('Trained Model')

#get embedding layer
model.pop()

In [75]:
speakers = ["No Speaker"]
numClips = 15


for i in range(numClips):
    speakers.append(str(i+1))

In [76]:
def getFeatures(audio_Paths):
    
    data_X = []
    
    for path in audio_Paths:
        
        audioLength = librosa.get_duration(filename=path)
        
        if audioLength != 1.0:
            continue
        
        audioFeatureArray = []        
        y, sr = librosa.load(path)
        
        #mfcc
        mfccArray = librosa.feature.mfcc(y=y, sr=sr)
        audioFeatureArray.append(mfccArray.flatten())
        audioFeatureNumpyArray = np.array(audioFeatureArray)
        
        #zero_crossing_rate
        zeroCrossingArray = librosa.feature.zero_crossing_rate(y=y)
        np.append(audioFeatureNumpyArray, zeroCrossingArray.flatten())
        
        #spectral_rolloff
        spectralRollOffArray = librosa.feature.spectral_rolloff(y=y, sr=sr)
        np.append(audioFeatureNumpyArray, spectralRollOffArray.flatten())
        
        data_X.append(audioFeatureNumpyArray.flatten())
        
    return data_X

In [77]:
def findAllAudioFilePaths(speaker):
    audioFilesPaths = [y for x in os.walk("Dataset/Youtube Speech Dataset/Dataset/{}".format(speaker)) for y in glob(os.path.join(x[0], '*.wav'))]
    return audioFilesPaths

In [78]:
def getdata(file):
    
    audio_Paths = findAllAudioFilePaths(file)

    onePath = []
    onePath.append(audio_Paths[0])
    
    audioFeatureArray = getFeatures(audio_Paths) 
    validation_x = np.array(audioFeatureArray)
    
    embeddingScore = model.predict(validation_x)        

    return embeddingScore

In [79]:
predictScor = []

numSpeaker = 4

for speaker in range(4, 8):#numSpeaker): #speakers:
    
    if(speaker == 0):
        predictScor = getdata(str(speaker+1))
        predictScore = np.array(predictScor)
    else:
        
        data = getdata(str(speaker+1))
        predictScore = np.append(predictScore, data, axis=0)

print("***------")
print(predictScore.shape)




***------
(2086, 16)


In [80]:
correctThreshold = 0

for i in range(25, 100):
    thresh = i
    clusters = hcluster.fclusterdata(predictScore, thresh, criterion="distance", metric='euclidean', method='centroid')

    if(len(set(clusters)) <= numSpeaker):
        correctThreshold = i
        break
    
    print("threshold: %f, number of clusters: %d" % (thresh, len(set(clusters))))
  

threshold: 25.000000, number of clusters: 116
threshold: 26.000000, number of clusters: 108
threshold: 27.000000, number of clusters: 102
threshold: 28.000000, number of clusters: 97
threshold: 29.000000, number of clusters: 89
threshold: 30.000000, number of clusters: 81
threshold: 31.000000, number of clusters: 77
threshold: 32.000000, number of clusters: 67
threshold: 33.000000, number of clusters: 63
threshold: 34.000000, number of clusters: 56
threshold: 35.000000, number of clusters: 54
threshold: 36.000000, number of clusters: 52
threshold: 37.000000, number of clusters: 49
threshold: 38.000000, number of clusters: 44
threshold: 39.000000, number of clusters: 42
threshold: 40.000000, number of clusters: 38
threshold: 41.000000, number of clusters: 37
threshold: 42.000000, number of clusters: 36
threshold: 43.000000, number of clusters: 34
threshold: 44.000000, number of clusters: 31
threshold: 45.000000, number of clusters: 30
threshold: 46.000000, number of clusters: 26
thresho

In [87]:
print("Threshold: {}".format(correctThreshold))

clusters = hcluster.fclusterdata(predictScore, thresh, criterion="distance", metric='euclidean', method='centroid')

import sys
np.set_printoptions(threshold=sys.maxsize)

print(clusters[0:1000])

Threshold: 91
[4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 2 4 4 4 4 4 4 4 4 4 4 4 2 4 4 4 2 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 2
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 2 3 3 3 3 3 2 3 3 3 3 3 3
 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 