In [25]:
from tensorflow import keras
import numpy as np
import pyaudio
import time
import librosa
import os
from glob import glob
import scipy.cluster.hierarchy as hcluster
import matplotlib.pyplot as plt

In [26]:
#load model
model = keras.models.load_model('Trained Model')

#get embedding layer
model.pop()

In [27]:
speakers = ["No Speaker"]
numClips = 23


for i in range(numClips):
    speakers.append(str(i+1))

In [28]:
def getFeatures(audio_Paths):
    
    data_X = []
    
    for path in audio_Paths:
        
        audioLength = librosa.get_duration(filename=path)
        
        if audioLength != 1.0:
            continue
        
        audioFeatureArray = []        
        y, sr = librosa.load(path)
        
        #mfcc
        mfccArray = librosa.feature.mfcc(y=y, sr=sr)
        audioFeatureArray.append(mfccArray.flatten())
        audioFeatureNumpyArray = np.array(audioFeatureArray)
        
        #zero_crossing_rate
        zeroCrossingArray = librosa.feature.zero_crossing_rate(y=y)
        np.append(audioFeatureNumpyArray, zeroCrossingArray.flatten())
        
        #spectral_rolloff
        spectralRollOffArray = librosa.feature.spectral_rolloff(y=y, sr=sr)
        np.append(audioFeatureNumpyArray, spectralRollOffArray.flatten())
        
        data_X.append(audioFeatureNumpyArray.flatten())
        
    return data_X

In [29]:
def findAllAudioFilePaths(speaker):
    audioFilesPaths = [y for x in os.walk("Dataset/Youtube Speech Dataset/Dataset/{}".format(speaker)) for y in glob(os.path.join(x[0], '*.wav'))]
    return audioFilesPaths

In [30]:
def getdata(file):
    
    audio_Paths = findAllAudioFilePaths(file)

    onePath = []
    onePath.append(audio_Paths[0])
    
    audioFeatureArray = getFeatures(audio_Paths) 
    validation_x = np.array(audioFeatureArray)
    
    embeddingScore = model.predict(validation_x)        

    return embeddingScore

In [31]:
predictScor = []

numSpeaker = 4

for speaker in range(numSpeaker): #speakers:
    
    if(speaker == 0):
        predictScor = getdata(str(speaker+1))
        predictScore = np.array(predictScor)
    else:
        
        data = getdata(str(speaker+1))
        predictScore = np.append(predictScore, data, axis=0)

print("***------")
print(predictScore.shape)

***------
(621, 16)


In [32]:
correctThreshold = 0

for i in range(60, 100):
    thresh = i
    clusters = hcluster.fclusterdata(predictScore, thresh, criterion="distance", metric='euclidean', method='centroid')

    if(len(set(clusters)) == numSpeaker):
        correctThreshold = i
        break
    
    print("threshold: %f, number of clusters: %d" % (thresh, len(set(clusters))))
  

threshold: 60.000000, number of clusters: 8
threshold: 61.000000, number of clusters: 8
threshold: 62.000000, number of clusters: 7
threshold: 63.000000, number of clusters: 7
threshold: 64.000000, number of clusters: 6
threshold: 65.000000, number of clusters: 6
threshold: 66.000000, number of clusters: 6
threshold: 67.000000, number of clusters: 6
threshold: 68.000000, number of clusters: 6
threshold: 69.000000, number of clusters: 6
threshold: 70.000000, number of clusters: 6
threshold: 71.000000, number of clusters: 6
threshold: 72.000000, number of clusters: 5
threshold: 73.000000, number of clusters: 5


In [33]:
print("Threshold: {}".format(correctThreshold))

clusters = hcluster.fclusterdata(predictScore, thresh, criterion="distance", metric='euclidean', method='centroid')
print(clusters)

Threshold: 74
[3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 