In [45]:
import numpy as np
import matplotlib.pyplot as plt
from python_speech_features import mfcc
from scipy.io import wavfile
from hmmlearn import hmm
import numpy as np
import os
import warnings
from pathlib import Path
import scipy.stats as sp

In [46]:
warnings.filterwarnings("ignore")


fpaths = []
labels = []
spoken = []
features = []

for f in os.listdir('audio'):
    for w in os.listdir('audio/' + f):
        print('audio/' + f + '/' + w)
        fpaths.append('audio/' + f + '/' + w)
        labels.append(f)
        if f not in spoken:
            spoken.append(f)
print('Words spoken:', spoken)


audio/apple/apple01.wav
audio/apple/apple02.wav
audio/apple/apple03.wav
audio/apple/apple04.wav
audio/apple/apple05.wav
audio/apple/apple06.wav
audio/apple/apple07.wav
audio/apple/apple08.wav
audio/apple/apple09.wav
audio/apple/apple10.wav
audio/apple/apple11.wav
audio/apple/apple12.wav
audio/apple/apple13.wav
audio/apple/apple14.wav
audio/apple/apple15.wav
audio/banana/banana01.wav
audio/banana/banana02.wav
audio/banana/banana03.wav
audio/banana/banana04.wav
audio/banana/banana05.wav
audio/banana/banana06.wav
audio/banana/banana07.wav
audio/banana/banana08.wav
audio/banana/banana09.wav
audio/banana/banana10.wav
audio/banana/banana11.wav
audio/banana/banana12.wav
audio/banana/banana13.wav
audio/banana/banana14.wav
audio/banana/banana15.wav
audio/kiwi/kiwi01.wav
audio/kiwi/kiwi02.wav
audio/kiwi/kiwi03.wav
audio/kiwi/kiwi04.wav
audio/kiwi/kiwi05.wav
audio/kiwi/kiwi06.wav
audio/kiwi/kiwi07.wav
audio/kiwi/kiwi08.wav
audio/kiwi/kiwi09.wav
audio/kiwi/kiwi10.wav
audio/kiwi/kiwi11.wav
audio/ki

In [47]:
for n, file in enumerate(fpaths):
    samplerate, d = wavfile.read(file)
    features.append(mfcc(d, samplerate=samplerate, numcep= 6))

c = list(zip(features, labels))
np.random.shuffle(c)
features,labels = zip(*c)

print(len(features))
print(len(labels))

105
105


In [48]:
features[1].shape

(67, 6)

In [49]:
m_trainingsetfeatures = features[0:84]
m_trainingsetlabels = labels[0:84]

print(len(m_trainingsetfeatures))
print(len(m_trainingsetlabels))

84
84


In [50]:
m_testingsetfeatures = features[84:105]
m_testingsetlabels = labels[84:105]


print(len(m_testingsetfeatures))
print(len(m_testingsetlabels))

21
21


In [51]:
m_testingsetlabels

('lime',
 'lime',
 'lime',
 'peach',
 'banana',
 'kiwi',
 'apple',
 'lime',
 'pineapple',
 'apple',
 'kiwi',
 'pineapple',
 'orange',
 'peach',
 'pineapple',
 'pineapple',
 'pineapple',
 'apple',
 'pineapple',
 'banana',
 'pineapple')

In [52]:
m_testingsetfeatures[3].shape

(61, 6)

In [53]:
m_testingsetfeatures[0][0]

array([ 14.55850866,  -5.11305441, -25.02023582,   3.84555879,
       -13.45192789,  14.64663819])

In [54]:
gmmhmmindexdict = {}
index = 0
for word in spoken:
    gmmhmmindexdict[word] = index
    index = index +1


print ('Loading of data completed')

Loading of data completed


In [55]:
gmmhmmindexdict

{'apple': 0,
 'banana': 1,
 'kiwi': 2,
 'lime': 3,
 'orange': 4,
 'peach': 5,
 'pineapple': 6}

In [56]:
#Parameters needed to train GMMHMM
m_num_of_HMMStates = 3  # number of states
m_num_of_mixtures = 2  # number of mixtures for each hidden state
m_covarianceType = 'diag'  # covariance type
m_n_iter = 10  # number of iterations
m_bakisLevel = 2


def initByBakis(inumstates, ibakisLevel):
    startprobPrior = np.zeros(inumstates)
    startprobPrior[0: ibakisLevel - 1] = 1/float((ibakisLevel - 1))
    transmatPrior = getTransmatPrior(inumstates, ibakisLevel)
    return startprobPrior, transmatPrior


def getTransmatPrior(inumstates, ibakisLevel):
    transmatPrior = (1 / float(ibakisLevel)) * np.eye(inumstates)

    for i in range(inumstates - (ibakisLevel - 1)):
        for j in range(ibakisLevel - 1):
            transmatPrior[i, i + j + 1] = 1. / ibakisLevel

    for i in range(inumstates - ibakisLevel + 1, inumstates):
        for j in range(inumstates - i - j):
            transmatPrior[i, i + j] = 1. / (inumstates - i)

    return transmatPrior

m_startprobPrior ,m_transmatPrior = initByBakis(m_num_of_HMMStates,m_bakisLevel)

In [57]:
print("StartProbPrior=")
print(m_startprobPrior)

print("TransMatPrior=")
print(m_transmatPrior)

StartProbPrior=
[1. 0. 0.]
TransMatPrior=
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]


In [58]:
class SpeechModel:
    def __init__(self,Class,label):
        self.traindata = np.zeros((0,6))
        self.Class = Class
        self.label = label
        self.model  = hmm.GMMHMM(n_components = m_num_of_HMMStates, n_mix = m_num_of_mixtures, \
                           transmat_prior = m_transmatPrior, startprob_prior = m_startprobPrior, \
                                        covariance_type = m_covarianceType, n_iter = m_n_iter)


In [59]:
len(spoken)

7

In [60]:
#7 GMMHMM Models would be created for 7 words
speechmodels = [None] * 7

for key in gmmhmmindexdict:
    speechmodels[gmmhmmindexdict[key]] = SpeechModel(gmmhmmindexdict[key],key)

for i in range(0,len(m_trainingsetfeatures)):
     for j in range(0,len(speechmodels)):
            if int(speechmodels[j].Class) == int(gmmhmmindexdict[m_trainingsetlabels[i]]):
                speechmodels[j].traindata = np.concatenate((speechmodels[j].traindata , m_trainingsetfeatures[i]))



for speechmodel in speechmodels:
    speechmodel.model.fit(speechmodel.traindata)


print ('Training completed -- 7 GMM-HMM models are built for 7 different types of words')
print (" ")
print(" ")

print("Prediction started")

Training completed -- 7 GMM-HMM models are built for 7 different types of words
 
 
Prediction started


In [61]:
speechmodels[j].traindata.shape

(460, 6)

In [62]:
m_trainingsetfeatures[i].shape

(67, 6)

In [63]:
m_trainingsetfeatures[80:]

(array([[ 18.18681994, -23.37852238, -32.89121343, -15.53199199,
         -15.00164995,   6.15089669],
        [ 20.29778479, -23.73123745, -30.07513332,  -2.75943572,
          -7.36829216,  23.79229755],
        [ 20.96593706, -18.14012329, -29.47788538,  -1.25399801,
          -3.17314227,  23.48739417],
        [ 21.38490391, -13.2887911 , -24.63889802,  -0.5162527 ,
           0.70752506,  16.10882142],
        [ 21.58254135, -19.80232229, -33.27791662, -10.16747969,
           3.99441062,  10.15452757],
        [ 21.89081938, -23.62173261, -35.39824141, -14.04680476,
          -4.02092314,  17.82065419],
        [ 21.51709292, -16.45078204, -27.91676901,  -5.97931312,
          -2.62654089,  20.97191928],
        [ 20.91215221, -13.9821985 , -25.92648505, -18.96561678,
         -17.43031493,  11.79498537],
        [ 19.34161492,  -2.32262634, -25.00998594,  -5.85593567,
         -24.19440645,   2.60070702],
        [ 18.47777424,   0.70486603, -19.04371359, -12.55847747,
        

In [64]:
#Testing
m_PredictionlabelList = []

for i in range(0,len(m_testingsetfeatures)):
    scores = []
    for speechmodel in speechmodels:
         scores.append(speechmodel.model.score(m_testingsetfeatures[i]))
    id  = scores.index(max(scores))
    m_PredictionlabelList.append(speechmodels[id].Class)
    print(str(np.round(scores, 3)) + " " + str(max(np.round(scores, 3))) +" "+":"+ speechmodels[id].label)


[ -974.546  -976.535 -1636.242  -837.858 -1127.844 -1659.116  -952.651] -837.858 :lime
[-1142.37  -1093.2   -1811.364  -807.554 -1280.788 -1889.464  -994.831] -807.554 :lime
[-1199.604 -1090.087 -1969.685  -824.79  -1232.028 -2012.199 -1082.786] -824.79 :lime
[-2303.691 -2692.183 -1475.786 -3720.924 -1378.726 -1073.281 -3054.686] -1073.281 :peach
[-1416.768  -946.316 -2766.12  -1458.454 -1395.129 -2501.859 -1242.871] -946.316 :banana
[-1512.057 -1491.14   -894.085 -1767.204 -1242.924 -1045.671 -1787.072] -894.085 :kiwi
[ -843.419 -1034.166 -1739.253 -1121.187 -1265.27  -1651.367  -914.489] -843.419 :apple
[-1135.958  -989.817 -1837.982  -794.197 -1124.262 -1908.248  -970.704] -794.197 :lime
[-1460.671 -1304.63  -2764.57  -1545.401 -1594.483 -2550.435 -1109.944] -1109.944 :pineapple
[ -721.824 -1025.247 -1751.975 -1184.258 -1317.386 -1604.864  -957.007] -721.824 :apple
[-1596.532 -1644.754  -819.18  -1882.441 -1384.177 -1052.521 -1852.643] -819.18 :kiwi
[-1351.46  -1363.604 -2464.215 -1

In [65]:
accuracy = 0.0
count = 0


print("")
print("Prediction for Testing DataSet:")

for i in range(0,len(m_testingsetlabels)):
    print( "Label"+str(i+1)+":"+m_testingsetlabels[i])
    if gmmhmmindexdict[m_testingsetlabels[i]] == m_PredictionlabelList[i]:
       count = count+1

accuracy = 100.0*count/float(len(m_testingsetlabels))

print("")
print("accuracy ="+str(accuracy))
print("")


Prediction for Testing DataSet:
Label1:lime
Label2:lime
Label3:lime
Label4:peach
Label5:banana
Label6:kiwi
Label7:apple
Label8:lime
Label9:pineapple
Label10:apple
Label11:kiwi
Label12:pineapple
Label13:orange
Label14:peach
Label15:pineapple
Label16:pineapple
Label17:pineapple
Label18:apple
Label19:pineapple
Label20:banana
Label21:pineapple

accuracy =100.0



In [66]:
#Calcuation of  mean ,entropy and relative entropy parameters
'''Entropyvalues for the 3 hidden states and 100 samples'''

def EntropyCalculator(dataarray,meanvalues,sigmavalues):
    entropyvals = []
    for i in range(0,len(dataarray[0])):
        totallogpdf = 0
        entropy = 0
        for j in range(0,len(dataarray)):
            totallogpdf += sp.norm.logpdf(dataarray[j,i],meanvalues[i],sigmavalues[i])
            entropy = (-1*totallogpdf)/len(dataarray)
        entropyvals.append(entropy)
    return entropyvals

'''Relative Entropyvalues for the 6 columns of the given data and sampled values'''
def RelativeEntropyCalculator(givendata,samplesdata,givendatasigmavals,sampledsigmavals,givendatameanvals,sampledmeanvals):

    absgivendatasigmavals =  [abs(number) for number in givendatasigmavals]
    abssampleddatasigmavals = [abs(number) for number in sampledsigmavals]
    relativeentropyvals = []

    for i in range(0,len(givendata[0])):
        totallogpdf = 0
        relativeentropy = 0
        for j in range(0,len(givendata)):
            totallogpdf +=(sp.norm.logpdf(samplesdata[j,i],sampledmeanvals[i],abssampleddatasigmavals[i])- sp.norm.logpdf(givendata[j,i],givendatameanvals[i],absgivendatasigmavals[i]))
            relativeentropy = (-1*totallogpdf)/float(len(givendata))
        relativeentropyvals.append(relativeentropy)
    return relativeentropyvals

cnt = 0

for speechmodel in speechmodels:
    print("For GMMHMM with label:" +speechmodel.label)
    samplesdata,state_sequence = speechmodel.model.sample(n_samples=len(speechmodel.traindata))

    sigmavals =[]
    meanvals  =[]

    for i in range(0, len(speechmodel.traindata[0])):
        sigmavals.append(np.mean(speechmodel.traindata[:, i]))
        meanvals.append(np.std(speechmodel.traindata[:, i]))


    sampledmeanvals = []
    sampledsigmavals =[]



    for i in range(0,len(samplesdata[0])):
        sampledmeanvals.append(np.mean(samplesdata[:,i]))
        sampledsigmavals.append(np.std(samplesdata[:,i]))




    GivenDataEntropyVals = EntropyCalculator(speechmodel.traindata,meanvals,meanvals)
    SampledValuesEntropyVals = EntropyCalculator(samplesdata,sampledmeanvals,sampledsigmavals)
    RelativeEntropy = RelativeEntropyCalculator(speechmodel.traindata,samplesdata,sigmavals,sampledsigmavals,meanvals,sampledmeanvals)

    print("MeanforGivenDataValues:")
    roundedmeanvals = np.round(meanvals, 3)
    print(str(roundedmeanvals))
    print("")

    print("EntropyforGivenDataValues:")
    roundedentropyvals = np.round(GivenDataEntropyVals, 3)
    print(str(roundedentropyvals))
    print("")

    print("MeanforSampleddatavalues:")
    roundedsampledmeanvals = np.round(sampledmeanvals, 3)
    print(str(roundedsampledmeanvals))
    print("")

    print("EntropyforSampledDataValues:")
    roundedsampledentvals = np.round(SampledValuesEntropyVals, 3)
    print(str(roundedsampledentvals))
    print("")

    print("RelativeEntopyValues:")
    roundedrelativeentvals = np.round(RelativeEntropy, 3)
    print(str(roundedrelativeentvals))
    print("")

For GMMHMM with label:apple
MeanforGivenDataValues:
[ 1.968 12.26   9.331 14.402 12.56  12.314]

EntropyforGivenDataValues:
[33.733  4.976 11.708  5.703  6.773  4.158]

MeanforSampleddatavalues:
[ 17.527  -4.857 -27.478 -11.462 -17.214   4.293]

EntropyforSampledDataValues:
[2.124 3.91  3.673 4.046 3.909 3.902]

RelativeEntopyValues:
[-2.066 -6.391 -1.524 -2.636 -1.614 -5.295]

For GMMHMM with label:banana
MeanforGivenDataValues:
[ 2.009  6.903  8.609 10.714  9.223 11.786]

EntropyforGivenDataValues:
[29.166  3.36  13.379  9.894  5.2    4.432]

MeanforSampleddatavalues:
[ 16.782   5.946 -29.445 -26.613  -7.233  -0.094]

EntropyforSampledDataValues:
[2.105 3.353 3.567 3.823 3.609 3.856]

RelativeEntopyValues:
[-2.02900e+00 -3.30000e-02 -1.61300e+00 -1.44300e+00 -2.77500e+00
 -5.16624e+02]

For GMMHMM with label:kiwi
MeanforGivenDataValues:
[ 1.581 13.168 10.591 12.058 20.699 10.533]

EntropyforGivenDataValues:
[48.272  5.356  3.788  5.577  9.097  5.508]

MeanforSampleddatavalues:
[ 16.9