# Hidden Markov Model 

In [922]:
# imports
import numpy as np
import os
import librosa

In [923]:
class dataset():
    def __init__(self,folder):
        self.folder = folder
        self.phones = os.listdir(folder)
        self.sr = 8000 # sampling rate of given audio files
        self.window = int(25*1e-3 * self.sr) # 25ms window
        self.overlap = int(10*1e-3 * self.sr) # 10ms overlap
        self.hop = self.window -  self.overlap
        
    def get_features(self, wav_path):
        y, self.sr = librosa.core.load(path=wav_path, sr=None, mono=True, duration=0.5)
        num_windows = len(y)//self.window
        x = librosa.feature.mfcc(y=y, sr=self.sr, n_mfcc=num_windows, n_fft=self.window, hop_length=len(y)//(13-1), n_mels=64) # x.shape = [num_windows, 13]
        return x

    def __call__(self):
        train_wavs = np.random.choice(self.phones, 32, replace=False) # randomly pick 32 wav files for training among 36 files
        test_wavs  = np.setdiff1d(self.phones, train_wavs, assume_unique=True) # take the rest 4 wav files as test data
        train, test = [], [] 
        train = np.array([self.get_features(os.path.join(self.folder, wav)) for wav in train_wavs])
        test  = np.array([self.get_features(os.path.join(self.folder, wav)) for wav in  test_wavs])
        # normalize the data
        train,test = train[:,1:,:], test[:,1:,:]
        train = train/np.std(train,axis=0) - np.mean(train)
        test  = test/np.std(test,axis=0)   - np.mean(test)
        return train, test

In [924]:
train, test = dataset('che/')()
train.shape, test.shape

((32, 19, 13), (4, 19, 13))

In [925]:
def gaussian(x, mu, sigma):
    d = len(x)
    px = 1/np.sqrt(np.linalg.det(sigma) * ((2*np.pi)**d))
    px *= np.exp(-0.5*np.matmul(np.matmul(np.transpose(x-mu), np.linalg.inv(sigma)), x-mu))
    return px

In [945]:
from Kmeans import Kmeans # Kmeans from previous assignment
class HMM():
    def __init__(self, K, trainset):
        self.K = K # states
        self.X = trainset
        self.M, self.N, self.d = self.X.shape # M:number of sound files, N=states, d=13 
        # initializing parameters
        self.pi = np.random.uniform(high=1, low=0, size=self.K)
        self.pi /= np.sum(self.pi)
        self.A = np.triu(np.random.uniform(high=1, low=0, size=(self.K, self.K)))
        self.A = np.transpose(self.A.T/np.sum(self.A, axis=1))
        #initalization using kmeans
        kmeans = Kmeans(X=self.X[0], K=self.K)
        kmeans.cluster(n_iter=2)
        self.means = kmeans.means # initialize means
        # covariances are initalized by finding covariances of each cluster
        mixtures = kmeans.get_clusters()
        # print(mixtures[0].shape)
        self.covars = np.zeros((self.K, self.d, self.d))
        for k in range(self.K):
            self.covars[k] = np.cov(mixtures[k], rowvar=False)
        #intialize arrays
        self.alpha = self.beta = self.gamma = self.emission = np.zeros((self.N, self.K)) # shape: (N,k)
        self.Q = np.random.normal(0,1)
    
    def maximization(self, X):
        # emission probabilities
        for n,x in enumerate(X):
            for k in range(self.K):
                self.emission[n][k] = gaussian(np.asarray(x), self.means[k], np.asarray(self.covars[k]))
        # alpha - forward 
        self.alpha[0] = self.emission[0] * self.pi
        for n in range(1,self.N):
            self.alpha[n] = self.emission[n] * np.matmul(self.alpha[n-1], self.A)
        # beta - backward
        self.beta[self.N-1] = np.ones(self.K) # self.beta.shape: (N,k)
        for n in range(self.N-2, -1, -1):
            self.beta[n] = np.matmul(self.A, self.emission[n+1]*self.beta[n+1])
        self.pX = np.sum(self.alpha[self.N-1])
        # gamma 
        self.gamma = self.alpha * self.beta / self.pX
        self.Zeta = np.array([self.emission[n]*(self.alpha[n]*(self.A*self.beta[n]).T).T  for n in range(self.N)]) / self.pX
        # update parameters
        new_pi = self.gamma[0]/np.sum(self.gamma[0])
        new_A = np.sum(self.Zeta, axis=0) / np.sum(np.sum(self.Zeta, axis=0), axis=1)
        new_covars, new_means = np.zeros((self.K, self.d, self.d)), np.zeros((self.K, self.d))
        for k in range(self.K):
            # ck = np.matmul(X[n]-self.means[k], np.transpose(X[n]-self.means[k]))
            new_covars[k] = np.sum([self.gamma[n][k]*np.tensordot(X[n]-self.means[k], X[n]-self.means[k], axes=0) for n in range(self.N)], axis=0) / np.sum(self.gamma[:,k], axis=0)
            #print(np.linalg.det(self.covars[k]))
            new_means[k] = np.sum([self.gamma[n][k]*X[n] for n in range(self.N)], axis=0) / np.sum(self.gamma[:,k], axis=0)
            
        return [new_pi, new_A, new_means, new_covars]
        
    def likelihood(self, X):
        return np.sum(self.alpha[self.N-1]) # beta(ZN)=1
    
    def expectation(self):
        # expectation step
        Q = np.sum(self.gamma[0] * self.pi) + np.sum(self.Zeta*self.A) + np.sum(self.gamma * self.emission)
        return Q
    
    def train(self, threshold):
        # EM iterations to train the model
        [self.pi, self.A, self.means, self.covars] =  self.maximization(self.X[0])
        self.new_Q = self.expectation()
        print('log likelihood: ', self.Q)
        print('Error:', self.new_Q-self.Q)
#         while np.abs(self.new_Q - self.Q) > threshold:
        count=1
        while np.abs(self.Q - self.new_Q) > threshold:
            cnt=0
            print(self.Q, self.new_Q)
            for X in self.X:
                cnt+=1
                self.Q = self.new_Q
                [self.pi, self.A, self.means, self.covars] =  self.maximization(X)
                self.new_Q = self.expectation()
                print('batch_count', cnt)
                print('log likelihood: ', self.new_Q)
                print('Error: ', np.abs(self.new_Q-self.Q))
                break
            count +=1
            print('Epoch:', count)
            
    def get_params(self):
        return {'means':self.means, 'covars':self.covars, 'pi':self.pi, 'transitions':self.A}
    

In [956]:
che, che_test = dataset('che/')()
print(che.shape)
hmm1 = HMM(K=3, trainset=che)
hmm1.train(1e-10)


(32, 19, 13)
log likelihood:  0.3034576874039704
Error: 31.157530196841506
0.3034576874039704 31.460987884245476
batch_count 1
log likelihood:  31.460987884245533
Error:  5.684341886080802e-14
Epoch: 2


  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.
