In [1]:
%matplotlib inline

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
from scipy.io import wavfile
from scipy.spatial.distance import cdist
import os, random
from sklearn.cluster import KMeans
bias = np.finfo(0.).tiny
def readData(dire):
    Data = {}
    title= {}
    i = 0
    for fname in os.scandir(dire):
        if fname.is_file():
            rate, samples = wavfile.read(fname.path)
            temp = Spectro(rate, samples).T
            Data.update({i: np.array(temp, copy=True)})
            title.update({i: fname})
            i = i + 1
    return Data,title

def Spectro(rate, samples):
    total = int((len(samples) - rate * 0.025) / (rate * 0.01) + 1)
    spectro = np.zeros((int(64 / 2), total), dtype=complex)
    for i in range(total):
        value = np.fft.fft( np.hamming(400) * samples[int(i * 0.01 * rate):int(i * 0.01 * rate) + int(0.025 * rate)], 64)
        spectro[:, i] = value[0:int(64 / 2)]
    return np.log(np.absolute(spectro))

def MyKmeans(x, noOfMix, itr=20):
    N = x.shape[0]
    ran = np.random.choice(N, noOfMix)
    means = x[ran, :]
    dist = cdist(x, means, 'euclidean')
    clusters = np.array([np.argmin(i) for i in dist])
    i = 0
    while i < itr:
        means = []
        for ran in range(noOfMix):
            temp = x[clusters == ran].mean(axis=0)
            means.append(temp)
        means = np.vstack(means)
        dist = cdist(x, means, 'euclidean')
        clusters = np.array([np.argmin(i) for i in dist])
        i = i + 1
    return clusters


In [3]:
def ViterbiAlgo(transition, prior, emission, observation):
    N = transition.shape[0]
    T = len(observation)

    transition,prior,emission = np.log(transition + bias), np.log(prior + bias), np.log(emission + bias)

    prob = np.zeros((N, T))    
    backTrack = np.zeros((N, T-1)).astype(np.int32)
    for i in range(0,3):
        prob[i][0] = prior[i] + emission[i][observation[0]]

    for t in range(1, T):
        for i in range(N):
            s = transition[:, i] + prob[:, t-1]
            backTrack[i][t-1] = np.argmax(s)
            prob[i][t] =  emission[i][observation[t]]+ np.max(s)

    FinalSS = [0]*T
    FinalSS[-1] = np.argmax(prob[:, -1])
    for n in range(T-2, -1, -1):
        FinalSS[n] = backTrack[int(FinalSS[n+1]), n]
    return FinalSS

In [4]:
prior = np.array([0.5, 0.5, 0.0])
mat = [[0.6, 0.4, 0.0], [0.3, 0.5, 0.2], [0.0, 0.1, 0.9]]
transition = np.array(mat)
mat = [[0.25, 0.25, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0],
       [0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0]]
emission = np.array(mat)

#Data preprocessing
SpeechData,speechTitle = readData('/content/drive/MyDrive/SaiManoj Kumar/speech_music_classification/train/speech')
musicData, musicTitle= readData('/content/drive/MyDrive/MLSP/SaiManoj Kumar/speech_music_classification/train/music')

x=np.array([])
flag=0
for i in SpeechData:
    if flag==0:
        x=SpeechData[i]
        flag=1
    else:
        x = np.vstack((x, SpeechData[i]))
for i in musicData:
    x = np.vstack((x, musicData[i]))

MyKmeans = KMeans(n_clusters=8, init='k-means++', random_state=42).fit(x)
y_predict = MyKmeans.labels_
i=0
for j in range(0, 40*2998, 2998 ):
    SpeechData.update({i: y_predict[j:j+2998]})
    i=i+1
i=0
for j in range(40*2998, 2*40*2998, 2998 ):
    musicData.update({i: y_predict[j:j+2998]})
    i=i+1

list1 = list(range(0,len(SpeechData)))
speechFile=random.choice(list1)

observationSpeech= SpeechData[speechFile]
file=speechTitle[speechFile]
finalSS = ViterbiAlgo(transition, prior, emission, observationSpeech)
print(np.array(finalSS),"is the best state sequence for",file,"using the given HMM definition.")

[1 0 0 ... 1 1 1] is the best state sequence for <DirEntry 'greek1.wav'> using the given HMM definition.
