In [5]:
#9


import os
from glob import glob

import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm


def parse_free_digits(directory):
    # Parse relevant dataset info
    files = glob(os.path.join(directory, "*.wav"))
    
    fnames = [f.split("\\")[1].split(".")[0].split("_") for f in files]
    
    ids = [f[2] for f in fnames]
    y = [int(f[0]) for f in fnames]
    speakers = [f[1] for f in fnames]
    _, Fs = librosa.core.load(files[0], sr=None)

    def read_wav(f):
        wav, _ = librosa.core.load(f, sr=None)

        return wav

    # Read all wavs
    wavs = [read_wav(f) for f in files]

    # Print dataset info
    print("Total wavs: {}. Fs = {} Hz".format(len(wavs), Fs))

    return wavs, Fs, ids, y, speakers


def extract_features(wavs, n_mfcc=6, Fs=8000):
    # Extract MFCCs for all wavs
    window = 30 * Fs // 1000
    step = window // 2
    frames = [
        librosa.feature.mfcc(
            wav, Fs, n_fft=window, hop_length=window - step, n_mfcc=n_mfcc
        ).T

        for wav in tqdm(wavs, desc="Extracting mfcc features...")
    ]

    print("Feature extraction completed with {} mfccs per frame".format(n_mfcc))

    return frames


def split_free_digits(frames, ids, speakers, labels):
    print("Splitting in train test split using the default dataset split")
    # Split to train-test
    X_train, y_train, spk_train = [], [], []
    X_test, y_test, spk_test = [], [], []
    test_indices = ["0", "1", "2", "3", "4"]
    
    for idx, frame, label, spk in zip(ids, frames, labels, speakers):
        if str(idx) in test_indices:
            X_test.append(frame)
            y_test.append(label)
            spk_test.append(spk)
        else:
            X_train.append(frame)
            y_train.append(label)
            spk_train.append(spk)

    return X_train, X_test, y_train, y_test, spk_train, spk_test


def make_scale_fn(X_train):
    # Standardize on train data
    scaler = StandardScaler()
    scaler.fit(np.concatenate(X_train))
    print("Normalization will be performed using mean: {}".format(scaler.mean_))
    print("Normalization will be performed using std: {}".format(scaler.scale_))
    def scale(X):
        scaled = []

        for frames in X:
            scaled.append(scaler.transform(frames))
        return scaled
    return scale


def parser(directory, n_mfcc=6):
    wavs, Fs, ids, y, speakers = parse_free_digits(directory)
    frames = extract_features(wavs, n_mfcc=n_mfcc, Fs=Fs)
    make_scale_fn(frames)
#     print(len(frames))
#     print(len(ids))
#     print(len(y))
#     print(len(speakers))
    X_train, X_test, y_train, y_test, spk_train, spk_test = split_free_digits(
        frames, ids, speakers, y
    )

    return X_train, X_test, y_train, y_test, spk_train, spk_test

ImportError: cannot import name 'show_config' from 'numpy' (unknown location)

In [2]:
X_train, X_test, y_train, y_test, spk_train, spk_test = parser("recordings")

NameError: name 'parser' is not defined

In [50]:
from sklearn.model_selection import train_test_split

X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, stratify=y_train, test_size=0.20) 
#stratify to make sure we have the same analogy

In [51]:
#10
#necessary for model training/testing cause shapes don't fit otherwise
digits_tr = [0,0,0,0,0,0,0,0,0,0]

for i in range(len(X_tr)):
    if (type(digits_tr[y_tr[i]]) == int): 
        digits_tr[y_tr[i]] = X_tr[i]  #all instances of the same digit
    else:
        digits_tr[y_tr[i]] = np.concatenate((digits_tr[y_tr[i]], X_tr[i]), axis=0)

        
digits_val=[0,0,0,0,0,0,0,0,0,0]

for i in range(len(X_val)):
    if (type(digits_val[y_val[i]]) == int): 
        digits_val[y_val[i]] = X_val[i]
    else:
        digits_val[y_val[i]] = np.concatenate((digits_val[y_val[i]], X_val[i]), axis=0)
        
        
digits_test=[0,0,0,0,0,0,0,0,0,0]

for i in range(len(X_test)):
    if (type(digits_test[y_test[i]]) == int): 
        digits_test[y_test[i]] = X_test[i]
    else:
        digits_test[y_test[i]] = np.concatenate((digits_test[y_test[i]], X_test[i]), axis=0)
        
        
        
#its dimension would be num_sequences x seq_length x feature_dimension  ?      
digits_train_3 = [[], [], [], [], 
                   [], [], [], [], 
                   [], []]

for i in range(len(X_tr)):
    digits_train_3[y_tr[i]].append(np.array(X_tr[i]))

digits_val_3 = [[], [], [], [], 
                [], [], [], [], 
                [], []]

for i in range(len(X_val)):
    digits_val_3[y_val[i]].append(np.array(X_val[i]))
    
    
digits_test_3 = [[], [], [], [], 
                [], [], [], [], 
                [], []]

for i in range(len(X_test)):
    digits_test_3[y_test[i]].append(np.array(X_test[i]))

In [56]:
#10
#modified the given code
import numpy as np
import pomegranate
#from pomegranate import *

n_states = 2 # the number of HMM states
n_mixtures = 3 # the number of Gaussians
gmm = True # whether to use GMM or plain Gaussian
gmm_hmm=[]

for i in range(10):
    Xi = digits_tr[i] # data from a single digit (can be a numpy array)
    dists = [] # list of probability distributions for the HMM states

    for i in range(n_states):
        if gmm:
            a = pomegranate.GeneralMixtureModel.from_samples(MultivariateGaussianDistribution, num_mixtures, Xi)
        else:
            a = pomegranate.MultivariateGaussianDistribution.from_samples(Xi)
        dists.append(a)

    trans_mat = np.array([[0.5,0.5,0,0],[0,0.5,0.5,0],[0,0,0.5,0.5],[0,0,0,1]]) #your transition matrix aij=0 for j<i & j>i+1, 0.5 default initialization
    starts = np.array([1,0,0,0]) # your starting probability matrix
    ends = [] # your ending probability matrix #unknown for now

    #data = [] # your data: must be a Python list that contains: 2D lists with the sequences (so its dimension would be num_sequences x seq_length x feature_dimension)
              # But be careful, it is not a numpy array, it is a Python list (so each sequence can have different length)

    # Define the GMM-HMM
    model = pomegranate.HiddenMarkovModel.from_matrix(trans_mat, dists, starts, ends, state_names=['s{}'.format(i) for i in range(n_states)])
    gmm_hmm.append(model)
    


ValueError: numpy.ndarray size changed, may indicate binary incompatibility. Expected 88 from C header, got 80 from PyObject