In [1]:
import os
import argparse
import numpy as np
from scipy.io import wavfile
import features as mfcc
from hmmlearn import hmm
import sklearn.mixture as skm

In [2]:
def loadtrainingfeatures(input_folder):

    digit_features = {}

    for dirname in os.listdir(input_folder):

            digit_folder = os.path.join(input_folder,dirname)

            if not os.path.isdir(digit_folder):
                continue

            all_speaker_features  = []

            for speaker in os.listdir(digit_folder):

                speaker_folder =  os.path.join(digit_folder,speaker)


                features = np.asarray(())

                for filename in [x for x in os.listdir(speaker_folder) if x.endswith('.wav')][:5]:

                    filepath = os.path.join(speaker_folder,filename)
                    sampling_rate, audio_signal = wavfile.read(filepath)

                    feat = mfcc.getMFCC(audio_signal,sampling_rate)

                    if features.size == 0:
                        features = feat
                    else:
                        features = np.vstack((features, feat))

                all_speaker_features.append(features)

            digit_features[dirname] = all_speaker_features
    return digit_features

In [3]:
train_folder = "../free-digit-recordings/train_folder"
digit_features = loadtrainingfeatures(train_folder)

In [15]:
all_digit_UBM = {}

for digit in digit_features:
    digit_hmm = hmm.GMMHMM(n_components=8, n_iter=100, covariance_type='diag')
    digit_gmms = []
    for speaker in digit_features[digit]:
        speaker_gmm = skm.GaussianMixture(n_components=8,covariance_type='diag')
        speaker_gmm.fit(speaker)
        digit_gmms.append(speaker_gmm)
    
    digit_hmm.gmm_ = digit_gmms
    
    for speaker in digit_features[digit]:
        digit_hmm.fit(speaker)
    all_digit_UBM[digit] = digit_hmm
        

TypeError: only integer scalar arrays can be converted to a scalar index

In [5]:
test_folder = "../free-digit-recordings/test_folder"
 
speakers = {}

for dirname in os.listdir(test_folder):

            speaker_folder = os.path.join(test_folder,dirname)
            
            speaker_features = {}

            if not os.path.isdir(speaker_folder):
                continue

            for digit in os.listdir(speaker_folder):

                digit_folder =  os.path.join(speaker_folder,digit)
                
                digit_features = []

                features = np.asarray(())

                for filename in [x for x in os.listdir(digit_folder) if x.endswith('.wav')][:5]:

                    filepath = os.path.join(digit_folder,filename)
                    sampling_rate, audio_signal = wavfile.read(filepath)

                    feat = mfcc.getMFCC(audio_signal,sampling_rate)

                    if features.size == 0:
                        features = feat
                    else:
                        features = np.vstack((features, feat))

                digit_features.append(features)
                speaker_features[digit] = digit_features
            speakers[dirname] = speaker_features

In [13]:
speaker_models = {}
for s in speakers:
    sp_digit_model = {}
    for d in speakers[s]:
        
        x = speakers[s][d]
        model = all_digit_UBM[d].predict(x[0])
        sp_digit_model[d] = model
        
        #would save ubm for speaker here:
    speaker_models[s] = sp_digit_model


        
        
        

In [14]:
speaker_models['george']

{'0': array([2, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 5, 0,
        0, 0, 0, 7, 7, 7, 7, 3, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 4, 5, 0, 0,
        0, 0, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 4, 4, 4, 4, 4, 5, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 3, 2, 2, 6, 6, 6, 6, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 4, 4, 4, 4, 4, 5, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 3, 3, 2, 6, 6, 6, 6, 6, 6, 6, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 4, 4, 5, 0, 0, 0, 7, 7, 7, 7,
        7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]),
 '1': array([3, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 5, 5, 4, 4, 

In [None]:
fakeutterance = ['0','5','6','7','9']
george = {'george': []}
jackson = {}
for 

In [None]:
def getI_vector(N,F,C,T,t_dim):
    
  I = np.eye(t_dim);
  B =  np.dot(np.linalg.pinv(T),S)    
  cov_i = I + ((B*N).dot(T))
  V = np.dot(B,F)
  ivector = np.dot(np.linalg.pinv(cov_i) ,V)
  return ivector