In [2]:
%matplotlib inline

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import numpy as np
from scipy.io import wavfile
from scipy.spatial.distance import cdist
import math
import os
from sklearn.cluster import KMeans
import random

def readData(dire):
    Data = {}
    title= {}
    i = 0
    for fname in os.scandir(dire):
        if fname.is_file():
            rate, samples = wavfile.read(fname.path)
            temp = Spectro(rate, samples).T
            Data.update({i: np.array(temp, copy=True)})
            title.update({i: fname})
            i = i + 1
    return Data

def Spectro(rate, samples):
    total = int((len(samples) - rate * 0.025) / (rate * 0.01) + 1)
    spectro = np.zeros((int(64 / 2), total), dtype=complex)
    for i in range(total):
        value = np.fft.fft(np.hamming(400) * samples[int(i * 0.01 * rate):int(i * 0.01 * rate) + int(0.025 * rate)], 64)
        spectro[:, i] = value[0:int(64 / 2)]
    return np.log(np.absolute(spectro))


def MyKmeans(x, noOfMix, itr=20):
        N = x.shape[0]
        ran = np.random.choice(N, noOfMix)
        means = x[ran, :]
        dist = cdist(x, means, 'euclidean')
        clusters = np.array([np.argmin(i) for i in dist])
        i = 0
        while i < itr:
            means = []
            for ran in range(noOfMix):
                temp = x[clusters == ran].mean(axis=0)
                means.append(temp)
            means = np.vstack(means)
            dist = cdist(x, means, 'euclidean')
            clusters = np.array([np.argmin(i) for i in dist])
            i = i + 1
        return clusters

prior = np.array([0.5, 0.5, 0.0])
mat = [[0.6, 0.4, 0.0], [0.3, 0.5, 0.2], [0.0, 0.1, 0.9]]
transition = np.array(mat)
mat = [[0.25, 0.25, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0],
       [0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125],
       [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0]]
emission = np.array(mat)




In [4]:
#Preprocessing the data
SpeechData = readData('/content/drive/MyDrive/SaiManoj Kumar/speech_music_classification/train/speech')
musicData= readData('/content/drive/MyDrive/SaiManoj Kumar/speech_music_classification/train/music')

x=np.array([])
flag=0
for i in SpeechData:
    if flag==0:
        x=SpeechData[i]
        flag=1
    else:
        x = np.vstack((x, SpeechData[i]))
for i in musicData:
    x = np.vstack((x, musicData[i]))

MyKmeans = KMeans(n_clusters=8, init='k-means++', random_state=42).fit(x)
y_predict = MyKmeans.labels_
i=0
for j in range(0, 40*2998, 2998 ):
    SpeechData.update({i: y_predict[j:j+2998]})
    i=i+1
i=0
for j in range(40*2998, 2*40*2998, 2998 ):
    musicData.update({i: y_predict[j:j+2998]})
    i=i+1

list1 = list(range(0,len(SpeechData)))
speechFile=random.choice(list1)
list1 = list(range(0,len(musicData)))
musicFile=random.choice(list1)

In [5]:
#Probability estimation using Forward Algorithm
observationSpeech= SpeechData[speechFile]
observationMusic = musicData[musicFile]

def forward(observation):
    T = observation.shape[0]
    alphaN = np.array([0.0] * 3)
    alphaN_1 = np.array([0.0] * 3)
    c = np.array([0.0] * T)
    for t in range(0, T):
        if t == 0:
            s = 0
            for i in range(0, 3):
                alphaN[i] = emission[i][observation[t]] * prior[i]
                s = s + alphaN[i]
            c[t] = 1 / s
            for i in range(0, 3):
                alphaN[i] = alphaN[i] * c[t]
        else:
            s = 0
            for j in range(0, 3):
                count = 0
                for i in range(0, 3):
                    count = count + alphaN[i] * transition[i][j] * emission[j][observation[t]]
                alphaN_1[j] = count
                s = s + alphaN_1[j]
            c[t] = 1 / s
            for i in range(0, 3):
                alphaN_1[i] = alphaN_1[i] * c[t]
            alphaN = alphaN_1
    finalprob = 0
    for i in range(0, T):
        finalprob = finalprob + math.log(c[i])
    return (0 - finalprob), c


finalprob_Speech, scalingSpeech = forward(observationSpeech)
finalprob_Music, scalingMusic = forward(observationMusic)
if finalprob_Speech > finalprob_Music:
    print("Speech file is more likely under forward algorithm model")
else:
    print("Music file is more likely under forward algorithm model")

Speech file is more likely under forward algorithm model


In [6]:
#Probability estimation using Backward Algorithm
prior = np.array([0.5, 0.5, 0.0])
mat = [[0.6, 0.4, 0.0], [0.3, 0.5, 0.2], [0.0, 0.1, 0.9]]
transition = np.array(mat)
mat = [[0.25, 0.25, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0],
       [0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125],
       [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0]]
emission = np.array(mat)


def backward(observation, scaling):
    T = observation.shape[0]
    alphaN = np.array([0.0] * 3)
    alphaN_1 = np.array([0.0] * 3)
    for t in range(T - 1, -1, -1):
        if t == T - 1:
            alphaN = np.array([1.0] * 3)
            for i in range(0, 3):
                alphaN[i] = alphaN[i] * scaling[t]
        else:
            for i in range(0, 3):
                count = 0
                for j in range(0, 3):
                    count = count + transition[i][j] * emission[j][observation[t + 1]] * alphaN[j]
                alphaN_1[i] = count
            for i in range(0, 3):
                alphaN_1[i] = alphaN_1[i] * scaling[t]
            alphaN = alphaN_1
    finalprob = 0
    for i in range(0, 3):
        finalprob = finalprob + prior[i] * alphaN[i] * emission[i][observation[0]]
    return finalprob


finalprob_Speech = backward(observationSpeech, scalingSpeech)
finalprob_Music = backward(observationMusic, scalingMusic)


if finalprob_Speech > finalprob_Music:
    print("Speech file is more likely under backward algorithm model")
else:
    print("Music file is more likely under backward algorithm model")

Speech file is more likely under backward algorithm model
