In [145]:
# #Functions used:
# 1. readData():
#     This is used to read the data from the audio files
# 2. Spectro():
#     This is used to extract the spectrogram from the read data
# 3. Gaussian():
#     This is used to calculate the gaussian probability density value for the given data samples
# 4. AssociationProbabiliy():
#     This is used to calculate the Association Probabiliy
# 5. LogLikelihood():
#     This is used to the Log Likelihood
# 6. MyKmeans():
#     This is the Kmeans Algorithm for initialization
# 7. EM_GaussianMixture():
#     This has the EM algoritham 

In [None]:
import numpy as np
from scipy.io import wavfile
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
import copy
import math
import os

def readData(dire):
    Data = np.array([])
    i = 0
    for fname in os.scandir(dire):
        if fname.is_file():
            rate, samples = wavfile.read(fname.path)
            temp = Spectro(rate, samples).T
            if i == 0:
                Data = np.array(temp, copy=True)
            else:
                Data = np.vstack((Data, temp))
            i = i + 1
    return  Data

def Spectro(rate, samples):
    total = int((len(samples) - rate * 0.025) / (rate * 0.01) + 1)
    spectro = np.zeros((int(64 / 2), total), dtype=complex)
    for i in range(total):
        value = np.fft.fft( np.hamming(400) * samples[int(i * 0.01 * rate):int(i * 0.01 * rate) + int(0.025 * rate)], 64)
        spectro[:, i] = value[0:int(64 / 2)]
    return np.log(np.absolute(spectro))

def Gaussian(avg, Cov, vector):
    prob = ((2 * 3.14) ** 16) * math.sqrt(np.linalg.det(Cov))
    if prob == 0:
        prob = 0.0000000001
    prob = 1 / prob
    probe = np.dot((vector - avg), np.dot(np.linalg.inv(Cov), ((vector - avg).T)))
    probe1 = 0 - (float(probe) * 0.5)
    probe1 = math.exp(probe1)
    prob = prob * probe1
    return prob

def AssociationProbability(d, a,mean, cov):
    prob1x = {}
    for i in range(0, len(a)):
        prob1x.update({i: []})
        for j in range(0, d.shape[0]):
            prob1x.get(i).append(Gaussian( mean[i], cov.get(i), d[j]))
    summ = []
    for i in range(0, d.shape[0]):
        t = 0
        for j in range(0, len(a)):
            t = t + a[j] * prob1x.get(j)[i]
        summ.append(t)
    probx1 = {}
    for i in range(0, len(a)):
        probx1.update({i: []})
        for j in range(0, d.shape[0]):
            probx1.get(i).append(prob1x.get(i)[j] / summ[j])
    return probx1, prob1x

def LogLikelihood(alpha, prob1x, n):
    ll = 0
    for i in range(0, n):
        t = 0
        for j in range(noOfMix):
            t = t + alpha[j] * prob1x.get(j)[i]
        t = math.log(t)
        ll = ll + t
    return ll


def MyKmeans(x, noOfMix, itr):
    N = x.shape[0]
    ran = np.random.choice(N, noOfMix)
    means = x[ran, :]
    dist = cdist(x, means, 'euclidean')
    clusters = np.array([np.argmin(i) for i in dist])
    i = 0
    while i < itr:
        means = []
        for ran in range(noOfMix):
            temp = x[clusters == ran].mean(axis=0)
            means.append(temp)
        means = np.vstack(means)
        dist = cdist(x, means, 'euclidean')
        clusters = np.array([np.argmin(i) for i in dist])
        i = i + 1
    return clusters

def EM_GaussianMixture( d, noOfMix, cov_type):
    clusters, centroids = MyKmeans(d, noOfMix, 10), np.empty((noOfMix, d.shape[1]), dtype=float)
    flag = [True for i in range(0, noOfMix)]
    noOfSamples = [0 for i in range(0, noOfMix)]
    for i in range(0, d.shape[0]):
        centroids[clusters[i]] = np.add(centroids[clusters[i]], np.reshape(d[i], (1, d[i].shape[0])))

    for i in range(0, d.shape[0]):
        noOfSamples[clusters[i]] = noOfSamples[clusters[i]] + 1
    Cov,alpha = {},[]
    for i in range(0, noOfMix):
        centroids[i] = centroids[i] / noOfSamples[i]
        alpha.append(noOfSamples[i] / d.shape[1])

    for i in range(0, noOfMix):
        Cov.update({i: np.array([])})

    for i in range(0, d.shape[0]):
        t = np.reshape(d[i], (1, d[i].shape[0])) - centroids[clusters[i]]
        if flag[clusters[i]] == True:
            Cov.update({clusters[i]: np.dot(t.transpose(), t)})
            flag[clusters[i]] = False
        else:
            Cov.update({clusters[i]: np.add(Cov.get(clusters[i]), np.dot(t.transpose(), t))})

    for i in range(0, noOfMix):
        if cov_type == 'diag':
            Cov.update({i: np.diag(np.diag(Cov.get(i) / noOfSamples[i]))})
        else:
            Cov.update({i: Cov.get(i) / noOfSamples[i]})
    probxi, probix = AssociationProbability(d, alpha, centroids, Cov)
    lln = LogLikelihood(alpha, probix, d.shape[0])
    itr = 0
    while itr < 20:
        # Calculating Alphas
        aN = [0 for i in range(0, noOfMix)]
        for i in range(0, d.shape[0]):
            for j in range(0, noOfMix):
                aN[j] = aN[j] + probxi.get(j)[i]
        sumProb = []
        for i in range(0, noOfMix):
            sumProb.append(aN[i])
            aN[i] = aN[i] / d.shape[0]
        alpha = aN
        # Calculating Means
        centroidsN = np.empty((noOfMix, d.shape[1]), dtype=float)
        for i in range(0, d.shape[0]):
            for j in range(0, noOfMix):
                centroidsN[j] = np.add(centroidsN[j], d[i] * probxi.get(j)[i])
        for i in range(0, noOfMix):
            centroidsN[i] = centroidsN[i] / (sumProb[i])
        centroids= centroidsN

        # Calculating Covariance
        CovN = {}
        for i in range(0, noOfMix):
            CovN.update({i: np.empty((d.shape[1], d.shape[1]), dtype=float)})
        for i in range(0, d.shape[0]):
            for j in range(0, noOfMix):
                val = np.reshape(d[i] - centroidsN[j], (1, (d[i] - centroidsN[j]).shape[0]))
                value = np.dot(val.T, val)
                if i == 0:
                    CovN.update({j: probxi.get(j)[i] * value})
                else:
                    CovN.update({j: np.add(CovN.get(j), probxi.get(j)[i] * value)})
        for i in range(0, noOfMix):
            if cov_type == "diag":
                CovN.update({i: np.diag(np.diag(CovN.get(i) / sumProb[i]))})
            else:
                CovN.update({i: CovN.get(i) / sumProb[i]})
        Cov = copy.deepcopy(CovN)
        probxi, probix = AssociationProbability(d, alpha, centroids, Cov )
        lln_1 = LogLikelihood(alpha, probix, d.shape[0])
        lln = lln_1
        itr= itr + 1
    FinalLogLikelihood=[]
    for i in range(0,len(FinalLogLikelihood)):
        if i%2 == 1:
            FinalLogLikelihood[i] = (FinalLogLikelihood[i-1]+FinalLogLikelihood[i+1])/2
    xLabel = [i for i in range(0,len(FinalLogLikelihood))]
    plt.plot(xLabel,FinalLogLikelihood)
    plt.show()
    return alpha, centroids, Cov


noOfMix = 2
cov_type = "full"
SpeechData = readData('speech_music_classification/train/speech')
print("Speech files have been read")
sAlpha, sMean, sCov = EM_GaussianMixture(SpeechData,noOfMix, cov_type)
print("Speech model has been learnt")

MusicData= readData('speech_music_classification/train/music')
print("Music files have been read")
mAlpha, mMean, mCov = EM_GaussianMixture(MusicData,noOfMix, cov_type)
print("Music model has been learnt")

In [None]:
print("Testing the samples")
N = 0
testData=[]
orginalLabel=[]
for fname in os.scandir('speech_music_classification/test'):
    if fname.is_file():
        rate, samples = wavfile.read(fname.path)
        temp = Spectro(rate, samples).T
        testData.append(temp)
        orginalLabel.append(str(fname).split("_")[0])
        N = N + 1

predictionLabel = []
for i in range(0,len(testData)):
    temp=testData[i]
    FrameCount= 0
    for i in range(0, temp.shape[0]):
        musicProb = 0
        speechProb = 0
        for j in range(noOfMix):
            speechProb += sAlpha[j] * Gaussian(sMean[j], sCov.get(j), temp[i])
            musicProb += mAlpha[j] * Gaussian( mMean[j], mCov.get(j), temp[i])
        if musicProb > speechProb:
            FrameCount += 1
    if FrameCount < (temp.shape[0] - FrameCount):
        predictionLabel.append("speech")
    else:
        predictionLabel.append("music")
successCount = 0
for i in range(0,N):
    if predictionLabel[i] in orginalLabel[i]:
        successCount += 1
print("Error Rate for  ",noOfMix ," -mixture components with ",cov_type,"Covariance:", ((N-successCount) / N) * 100)