In [10]:
from tensorflow import keras
import numpy as np
import pyaudio
import time
import librosa
import os
from glob import glob
import scipy.cluster.hierarchy as hcluster
import matplotlib.pyplot as plt

In [11]:
#load model
model = keras.models.load_model('Trained Model')

#get embedding layer
model.pop()

In [12]:
speakers = ["No Speaker"]
numClips = 23


for i in range(numClips):
    speakers.append(str(i+1))

In [13]:
def getFeatures(audio_Paths):
    
    data_X = []
    
    for path in audio_Paths:
        
        audioLength = librosa.get_duration(filename=path)
        
        if audioLength != 1.0:
            continue
        
        audioFeatureArray = []        
        y, sr = librosa.load(path)
        
        #mfcc
        mfccArray = librosa.feature.mfcc(y=y, sr=sr)
        audioFeatureArray.append(mfccArray.flatten())
        audioFeatureNumpyArray = np.array(audioFeatureArray)
        
        #zero_crossing_rate
        zeroCrossingArray = librosa.feature.zero_crossing_rate(y=y)
        np.append(audioFeatureNumpyArray, zeroCrossingArray.flatten())
        
        #spectral_rolloff
        spectralRollOffArray = librosa.feature.spectral_rolloff(y=y, sr=sr)
        np.append(audioFeatureNumpyArray, spectralRollOffArray.flatten())
        
        data_X.append(audioFeatureNumpyArray.flatten())
        
    return data_X

In [14]:
def findAllAudioFilePaths(speaker):
    audioFilesPaths = [y for x in os.walk("Dataset/Youtube Speech Dataset/Dataset/{}".format(speaker)) for y in glob(os.path.join(x[0], '*.wav'))]
    return audioFilesPaths

In [15]:
def getdata(file):
    
    audio_Paths = findAllAudioFilePaths(file)

    onePath = []
    onePath.append(audio_Paths[0])
    
    audioFeatureArray = getFeatures(audio_Paths) 
    validation_x = np.array(audioFeatureArray)
    
    embeddingScore = model.predict(validation_x)        

    return embeddingScore

In [19]:
predictScor = []

numSpeaker = 1

for speaker in range(numSpeaker): #speakers:
    
    if(speaker == 0):
        predictScor = getdata(str(speaker+1))
        predictScore = np.array(predictScor)
    else:
        
        data = getdata(str(speaker+1))
        predictScore = np.append(predictScore, data, axis=0)

print("***------")
print(predictScore.shape)

for i in range(10):
    print(predictScore[i])


***------
(124, 16)
[ 0.        0.        0.        0.        0.       78.55986   0.
  0.        0.        0.        0.        0.        0.        0.
  0.       28.033657]
[ 0.        0.        0.        0.        0.       53.745163  0.
  0.       41.077858  0.        0.        0.        0.        0.
  0.       33.880196]
[ 0.        0.        0.        0.        0.       28.087034  0.
  0.       20.831884  0.        0.        0.        0.        0.
  0.       22.473051]
[ 0.        0.        0.        0.        0.       45.467968  0.
  0.       18.405972  0.        0.        0.        0.        0.
  0.       29.711096]
[ 0.        0.        0.        0.        0.       57.781567  0.
  0.       44.595768  0.        0.       13.87579   0.        0.
  0.       39.74966 ]
[ 0.        0.        0.        0.        0.       53.216015  0.
  0.       23.865799  0.        0.        0.        0.        0.
  0.       22.290665]
[ 0.        0.        0.        0.        0.       57.49935   0.
  0

In [17]:
correctThreshold = 0

for i in range(60, 100):
    thresh = i
    clusters = hcluster.fclusterdata(predictScore, thresh, criterion="distance", metric='euclidean', method='centroid')

    if(len(set(clusters)) == numSpeaker):
        correctThreshold = i
        break
    
    print("threshold: %f, number of clusters: %d" % (thresh, len(set(clusters))))
  

threshold: 60.000000, number of clusters: 4
threshold: 61.000000, number of clusters: 4
threshold: 62.000000, number of clusters: 4
threshold: 63.000000, number of clusters: 4
threshold: 64.000000, number of clusters: 4
threshold: 65.000000, number of clusters: 3
threshold: 66.000000, number of clusters: 3
threshold: 67.000000, number of clusters: 3
threshold: 68.000000, number of clusters: 3
threshold: 69.000000, number of clusters: 3
threshold: 70.000000, number of clusters: 2
threshold: 71.000000, number of clusters: 2
threshold: 72.000000, number of clusters: 2
threshold: 73.000000, number of clusters: 2
threshold: 74.000000, number of clusters: 2
threshold: 75.000000, number of clusters: 2
threshold: 76.000000, number of clusters: 2
threshold: 77.000000, number of clusters: 2
threshold: 78.000000, number of clusters: 2
threshold: 79.000000, number of clusters: 2
threshold: 80.000000, number of clusters: 2
threshold: 81.000000, number of clusters: 2
threshold: 82.000000, number of 

In [18]:
print("Threshold: {}".format(correctThreshold))

clusters = hcluster.fclusterdata(predictScore, thresh, criterion="distance", metric='euclidean', method='centroid')
print(clusters)

Threshold: 86
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1]
