In [1]:
from typing import Dict, Optional, List
import numpy as np
import math

class HMMState:
    def __init__(self, mean=None, covariance=None, label1= None,isNull=False):
        self.mean = mean
        """n_gaussians of mean vectors."""
        self.covariance = covariance
        """n_gaussians of diagonal of covariance matrix."""
        self.label1 = label1
        """The digit associated with the state. `None` if the state is the first state."""
        self.parents = []
        """The state is the first state if the `parent` is `None`."""
        self.isNull=isNull
    def log_multivariate_gaussian_pdf_diag_cov(self, x, epsilon=1e-9):
        if self.isNull==False:
            mean = self.mean
            d = x.shape[0]
            cov_safe = self.covariance + epsilon * np.eye(d)
            log_det_cov = np.log(np.linalg.det(cov_safe))
            inv_cov = np.linalg.inv(cov_safe)
            const_term = -0.5 * d * np.log(2 * np.pi)
            diff = x - mean
            quadratic_term = -0.5 * np.dot(diff.T, np.dot(inv_cov, diff))
            log_pdf = const_term - 0.5 * log_det_cov + quadratic_term
            return log_pdf
        else:
            return 0


In [2]:
mean=[1,2,3]
covar=[[1,2,3],[1,2,3],[1,2,3]]
state=HMMState(mean,covar)
state.log_multivariate_gaussian_pdf_diag_cov(np.array([1,20,3]))

-107999991046.97011

In [3]:
import os
import numpy as np

def hmm_load_features(data_dir, silence_label="Silence"):
    samples = []
    for file_name in os.listdir(data_dir):
        if file_name.endswith('.npy'):
            if file_name == 'Silence.npy':
                # Load features from the 'Silence.npy' file
                features = np.load(os.path.join(data_dir, file_name))
                sample = {'label': silence_label, 'features': features}
                samples.append(sample)
            else:
                # Handle other .npy files with the expected naming convention
                parts = file_name.split('-')
                if len(parts) == 2 and parts[1].endswith('.npy'):
                    label = int(parts[0])
                    features = np.load(os.path.join(data_dir, file_name))
                    sample = {'label': label, 'features': features}
                    samples.append(sample)

    return samples



def filter_samples_by_label(samples, label):
    """
    Filters the list of sample dictionaries to include only those with a specific label.

    :param samples: List of dictionaries, where each dictionary contains 'label' and 'features' keys.
    :param label: The label to filter by (default is 1).
    :return: A filtered list of dictionaries.
    """
    return [sample["features"] for sample in samples if sample['label'] == label]
    # Assuming the training folder path is correct



In [4]:
from typing import List, Dict,Tuple
import numpy as np
import os
from sklearn.cluster import KMeans


class HMM:
    def __init__(self,label,training_folder_path='training'):
        self.states: List[HMMState] = []
        data=hmm_load_features(training_folder_path)
        self.label=label
        templates_for_label=filter_samples_by_label(data, label)
        self.templates=templates_for_label
        self.num_states=self.get_num_state()
        self.transitions: List[List[float]] = []
        self.state_index: Dict[HMMState, int] = {}
        self.initial_probabilities: List[float] = []  # Probability of starting in each state
    def calculate_mean_and_covariance(self,vectors):
    
    # Convert the list of vectors to a NumPy array for easier calculations
        vectors_np = np.array(vectors)
    
    # Calculate the mean vector
        mean_vector = np.mean(vectors_np, axis=0)
    
    # Calculate the variance vector
        covariance_matrix = np.cov(vectors_np.T)
    
        return mean_vector, covariance_matrix
    
    def normalize_sequence(self,seq):
        if not seq:
            return seq  # Return empty list if input is empty

        normalized_seq = [seq[0]]  # Start with the first element

        for i in range(1, len(seq)):
            current = seq[i]
            previous = normalized_seq[-1]

        # If current continues the trend or equals the previous, it's normal
            if current >= previous:
                normalized_seq.append(current)
            else:
            # Look ahead to see if this is a temporary dip or start of a new trend
                if i + 1 < len(seq) and seq[i + 1] >= current:
                # If next is greater than or equal to current, current is abnormal; repeat previous
                    normalized_seq.append(previous)
                else:
                # Otherwise, start of a new trend or continuation of a decrease
                    normalized_seq.append(current)

        return normalized_seq
    def print_status(self):
        print("HMM Status Report")
        print("=================")
        print(f"Number of States: {len(self.states)}")
        print(f'state index: {self.state_index}')
        # Optionally, print details about each state if HMMState has identifiable attributes
        for i, state in enumerate(self.states):
            print(f"  State {i}: {state}")  # Customize based on HMMState's attributes

        print(f"Number of Observations: {len(self.observations)}")
        # Optionally, print details about observations if they're simple enough to summarize
        for i, obs in enumerate(self.observations):
            print(f"  Observation {i}: Shape {obs.shape}")

        print(f"Transition Matrix: {len(self.transitions)}x{len(self.transitions)}" if self.transitions else "Not defined")
        for i, row in enumerate(self.transitions):
            print(f"  Transition from State {i}: {row}")

        print(f"State Index Map: {len(self.state_index)} entries")
        for state, index in self.state_index.items():
            print(f"  State {state} -> Index {index}")  # Customize based on HMMState's attributes

        print(f"Initial Probabilities: {self.initial_probabilities}")

    def add_state(self, state: HMMState):
        """Adds a state to the HMM."""
        self.states.append(state)
        
        # Ensure transitions matrix is updated to reflect the new state
        for row in self.transitions:
            row.append(0.0)  # Append 0.0 for new state to existing states
        self.transitions.append([0.0 for _ in range(len(self.states))])  # Add new state with transitions
    def initialize_HMM_states(self,label,training_folder_path = 'training'):
          # This path will need to be updated to the actual path
        data=hmm_load_features(training_folder_path)
        sequences = [sample['features'] for sample in data]
    
        templates_for_label=filter_samples_by_label(data, label)
#a is the list of all the templates for digit=label,len(a)=num of templates, len(a[0])=num of segments,len(a[0][0])=length of the first segment, len(a[0][0][0])=39, which is the dimension of the mfcc vector
        a=self.initial_segmentation(templates_for_label,5)
        #print(f'splited all the {len(a)} templates for digit {label} into {len(a[0])} segments uniformly')
        self.get_clusters(a)
        mean=[]
        cov=[]
        for i in range(5):
            m,cv=self.calculate_mean_and_covariance(self.get_clusters(a)[i])
            mean.append(m)
            cov.append(cv)
        return mean,cov
    #def calculate_transition_probabilities(self):
        #for
    

    def initialize(self,label,training_folder_path = 'training',num_states = 6):
        data=hmm_load_features(training_folder_path)
        sequences = [sample['features'] for sample in data]
    
        templates_for_label=filter_samples_by_label(data, label)
#a is the list of all the templates for digit=label,len(a)=num of templates, len(a[0])=num of segments,len(a[0][0])=length of the first segment, len(a[0][0][0])=39, which is the dimension of the mfcc vector
        a=self.initial_segmentation(templates_for_label,5)
        #print(f'len(a):{len(a)}')
        #print(f'len(a[0]):{len(a[0])}')
        clustered_data=self.get_clusters(a)
        mean,var=self.initialize_HMM_states(label)
        for i in range(num_states-1):
            # Create a new state and add it
            label2=f'{label}-{i+1}'
            new_state = HMMState(mean[i],var[i],label1=label2)
            self.state_index[new_state]=i
            self.add_state(new_state)
        #add a null state at the end
        label2="Null"
        null= HMMState(isNull=True,label1=label2) 
        self.state_index[null]=num_states-1
        self.add_state(null)
        # Set initial probabilities (uniform distribution for simplicity)
        self.initial_probabilities = [1 if i==0 else 0 for i in range(num_states)]
        
        # Set up transitions
        for i in range(num_states):
            if i < num_states - 1:
                self.transitions[i][i + 1] = len(templates_for_label)/len(clustered_data[i])
                self.transitions[i][i] = 1- self.transitions[i][i + 1] # Probability of staying in the same state
                  # Probability of moving to the next state
            
              # Last state is a null state


    def set_observations(self, observations: List[np.ndarray]):
        """Sets the sequence of observations for the HMM."""
        self.observations = observations

    def most_probable_sequence(self, obs_seq):
        V = [{}]
        path = {}

        # Initialize base cases (t == 0)
        for state in self.states:
            initial_prob = self.initial_probabilities[self.state_index[state]]
            V[0][self.state_index[state]] = (math.log(initial_prob) if initial_prob > 0 else -math.inf) + state.log_multivariate_gaussian_pdf_diag_cov(obs_seq[0])
            path[self.state_index[state]] = [state]

        # Run Viterbi for t > 0
        for t in range(1, len(obs_seq)):
            V.append({})
            newpath = {}
            for cur_state in self.states:
                max_log_prob = -math.inf  # Initialize with negative infinity for comparison
                best_prev_state = None  # Initialize with None to find the best previous state
                for prev_state in self.states:
                    transition_prob = self.transitions[self.state_index[prev_state]][self.state_index[cur_state]]
                    log_transition_prob = math.log(transition_prob) if transition_prob > 0 else -math.inf
                    log_prob = V[t-1][self.state_index[prev_state]] + log_transition_prob + cur_state.log_multivariate_gaussian_pdf_diag_cov(obs_seq[t])
                    if log_prob > max_log_prob:
                        max_log_prob = log_prob
                        best_prev_state = prev_state
                V[t][self.state_index[cur_state]] = max_log_prob
                if best_prev_state is not None:  # Check to ensure there is a valid previous state
                    newpath[self.state_index[cur_state]] = path[self.state_index[best_prev_state]] + [cur_state]
            path = newpath

        # Find the final state with the highest probability
        max_final_log_prob = max(V[-1].values())
        final_state = [state for state, prob in V[-1].items() if prob == max_final_log_prob][0]

        return (max_final_log_prob, path[final_state])

    

    def initial_segmentation(self, templates, num_segments):
    
        segmented_templates = []

        for template in templates:
        # Determine the size of each segment
            num_observations = len(template)
            segment_size = num_observations // num_segments
            extra = num_observations % num_segments

            segments = []
            start_idx = 0

            for _ in range(num_segments):
            # Adjust segment size to distribute remaining observations
                end_idx = start_idx + segment_size + (1 if extra > 0 else 0)
            # Decrease extra count until it's distributed
                extra -= 1 if extra > 0 else 0

            # Extract the segment and add to the list
                segment = template[start_idx:end_idx]
                segments.append(segment)

                start_idx = end_idx

            segmented_templates.append(segments)

        return segmented_templates
    def get_clusters(self,segmented_templates,num_segments=5):
        a={}
        
        for i in range(len(segmented_templates)):
            for j in range(len(segmented_templates[i])):
                if i==0:
                    a[j]=np.array(segmented_templates[i][j])
                else:
                    if j>=num_segments:
                        continue
                    else:
                        if j not in a.keys():
                            a[j]=np.array(segmented_templates[i][j])
                        else:
                            a[j]=np.concatenate((a[j],np.array(segmented_templates[i][j])))
        
        return a
    def segment_based_on_indices(self,template,indices):
        segmented_template=[]
        if len(indices)!=0:
            segmented_template.append(template[:indices[0]])
            for i in range(len(indices)-1):
                segment=template[indices[i]:indices[i+1]]
                segmented_template.append(segment)
            if indices[len(indices)-1]!=len(template):
                segmented_template.append(template[indices[len(indices)-1]:])
            else:
                segmented_template.append(template[indices[len(indices)-1]-1:])

        return segmented_template
    def get_num_state(self):
        return len(self.states)
    def train_single_iteration(self):
        
        templates=self.templates
        segmented_templates=[]
        split_indices=[]
        score_total=[]
        for i in range(len(templates)):
            compare_template=templates[i]
            (p,s)=self.most_probable_sequence(compare_template)
            score_total.append(p)
            n=self.normalize_sequence([self.state_index[i] for i in s])
            
            indices=[i for i in range(len(n)-1) if n[i]!=n[i+1]]
            split_indices.append(indices)
            
            segmented_template=self.segment_based_on_indices(compare_template,indices)
            segmented_templates.append(segmented_template)
   

        score=np.sum(score_total)
        #print(f'len(segmented_templates): {len(segmented_templates)}')
        #print(f'len(segmented_templates[0]):{min(len(segmented_templates[i]) for i in range(10))}')
        clusted_data=self.get_clusters(segmented_templates)
        for i in range(len(clusted_data)):
            print(f'len for the {i} the cluster is{len(clusted_data[i])}')
        mean=[]
        cov=[]
        #update emission probabilities
        for i in range(5):
            m,cv=self.calculate_mean_and_covariance(clusted_data[i])
            mean.append(m)
            cov.append(cv)
        #print(mean)
        for i in range(len(self.states)-1):
            self.states[i].mean=mean[i]
            self.states[i].covariance=cov[i]
        #update transition probabilities
        
        for i in range(len(self.states)-1):
            
                #print(f'len(templates): {len(templates)}')
                #print(f'len(clusted_data[i]): {len(clusted_data[i])}')
            self.transitions[i][i + 1] = len(templates)/len(clusted_data[i])
            self.transitions[i][i] = 1- self.transitions[i][i + 1] # Probability of staying in the same state
      
        return score
    def train(self, iterations=10):
        best_s=-math.inf
        for i in range(iterations):
            s=self.train_single_iteration()
            if s>best_s:
                best_s=s
                print(f'HMM training for the {i}th iteration, training score: {s}')
            else: 
                break

In [5]:
a={}
a[1]=2
a[1]=3
a

{1: 3}

In [6]:
import pickle

def save_hmm(hmm, filename):
    """
    Save a trained Hidden Markov Model (HMM) to a file using pickle.

    Parameters:
    - hmm: The HMM object to save.
    - filename: The name of the file where the HMM should be saved.
    """
    with open(filename, 'wb') as file:
        pickle.dump(hmm, file)
    print(f"HMM model has been saved to '{filename}'")
def load_hmm(filename):
    """
    Load a trained Hidden Markov Model (HMM) from a file using pickle.

    Parameters:
    - filename: The name of the file from which to load the HMM.

    Returns:
    - The loaded HMM object.
    """
    with open(filename, 'rb') as file:
        hmm = pickle.load(file)
    print(f"HMM model has been loaded from '{filename}'")
    return hmm

In [7]:
def train_all_HMM(iterations=20):
    all_label=range(0,10)
    for i in all_label:
        filename=f'Digit {i} HMM'
        print(f"Training {filename}")
        hmm=HMM(label=i)
        hmm.initialize_HMM_states(label=i)
        hmm.initialize(label=i)
        hmm.train(iterations)
        save_hmm(hmm, filename)
        print(f'{filename} training finished! Moving to the next.')
def train_silence_HMM(iterations=20):
    label="Silence"
        
train_all_HMM()     


Training Digit 0 HMM


FileNotFoundError: [Errno 2] No such file or directory: 'training'

In [None]:
def load_all_hmm():
    hmm1=[]
    for i in range(10):
        hmm=load_hmm(f'Digit {i} HMM')
        hmm1.append(hmm)
    return hmm1

def recognize(hmm1,data,digit):
    p_max=-math.inf
    for i in range(10):
        
        p,s=hmm1[i].most_probable_sequence(data)
        if p>p_max:
            p_max=p
            j=i
    print(f"The voice is recognized as {j}, the true value is {digit}")
    if j==digit:
        print("Congrats, you recognized digit right")
    else:
        print("Opps, it seems that you are wrong")



In [None]:
hmm1=load_all_hmm()

HMM model has been loaded from 'Digit 0 HMM'
HMM model has been loaded from 'Digit 1 HMM'
HMM model has been loaded from 'Digit 2 HMM'
HMM model has been loaded from 'Digit 3 HMM'
HMM model has been loaded from 'Digit 4 HMM'
HMM model has been loaded from 'Digit 5 HMM'
HMM model has been loaded from 'Digit 6 HMM'
HMM model has been loaded from 'Digit 7 HMM'
HMM model has been loaded from 'Digit 8 HMM'
HMM model has been loaded from 'Digit 9 HMM'


In [None]:
hmm1[0].states[0].label1
hmm1[0].label

0

In [None]:
hmm1[0].print_status()

HMM Status Report
Number of States: 6
state index: {<__main__.HMMState object at 0x000001B2DDD79610>: 0, <__main__.HMMState object at 0x000001B2EC369E50>: 1, <__main__.HMMState object at 0x000001B2FEC3B6D0>: 2, <__main__.HMMState object at 0x000001B2FEC3B290>: 3, <__main__.HMMState object at 0x000001B2FEC3B950>: 4, <__main__.HMMState object at 0x000001B2FEC39150>: 5}
  State 0: <__main__.HMMState object at 0x000001B2DDD79610>
  State 1: <__main__.HMMState object at 0x000001B2EC369E50>
  State 2: <__main__.HMMState object at 0x000001B2FEC3B6D0>
  State 3: <__main__.HMMState object at 0x000001B2FEC3B290>
  State 4: <__main__.HMMState object at 0x000001B2FEC3B950>
  State 5: <__main__.HMMState object at 0x000001B2FEC39150>


AttributeError: 'HMM' object has no attribute 'observations'

In [None]:

data=hmm_load_features('training')
digit=3
templates_for_digit=filter_samples_by_label(data, digit) 
print(len(hmm1))  
recognize(hmm1,templates_for_digit[0],digit)

10
The voice is recognized as 3, the true value is 3
Congrats, you recognized digit right


In [None]:
!pip install librosa




DEPRECATION: Loading egg at c:\python311\lib\site-packages\sigkernel-0.0.1-py3.11-win-amd64.egg is deprecated. pip 23.3 will enforce this behaviour change. A possible replacement is to use pip for package installation..
DEPRECATION: Loading egg at c:\python311\lib\site-packages\tslearn-0.6.1-py3.11.egg is deprecated. pip 23.3 will enforce this behaviour change. A possible replacement is to use pip for package installation..

[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip





In [None]:
class Language_HMM:
    def __init__(self,hmms):
        self.states: List[HMMState] = []
        self.hmms=hmms
        self.observations: List[np.ndarray] = []
        self.transitions: List[List[float]] = []
        self.state_index: Dict[HMMState, int] = {}
        self.initial_probabilities: List[float] = [] 
        self.set_state_index()
    def get_all_states(self,hmms):
        null_state=HMMState(isNull=True)
        self.states.append(null_state)
        for hmm in self.hmms[2:]:
            for state in hmm.states:
                self.states.append(state)
        null_state=HMMState(isNull=True)
        self.states.append(null_state)
        for i in range(1,7):
            for hmm in self.hmms:
                for state in hmm.states:
                    self.states.append(state)
            null_state=HMMState(isNull=True)
            self.states.append(null_state)
    def set_state_index(self):
        for i in range(len(self.states)):
            self.state_index[self.states[i]]=i
    def initialize_transition(self):
        self.transitions= np.zeros(len(self.states), (len(self.states)))
        self.set_transitions()
    def set_transitions(self):
        null_states_indices=[0,41,41+51,41+51*2,41+51*3,41+51*4,41+51*5,41+51*6]
        idx=0
        for i in range(0,8):
            hmm=self.hmms[2+i]
            for j in range(5):
                if j!=4:
                    self.transitions[5*i+j][5*i+j+1]=hmm.transitions[j][j+1]
                    self.transitions[5*i+j][5*i+j]=hmm.transitions[j][j]
                else:
                    self.transitions[5*i+j][5*i+j]=hmm.transitions[j][j]
        for k in range(1,6):
            c=42+51*(k-1)
            for i in range(0,10):
                hmm=self.hmms[i]
                for j in range(5):
                    if j!=4:
                        self.transitions[5*i+j+c][5*i+j+c+1]=hmm.transitions[j][j+1]
                        self.transitions[5*i+j+c][5*i+j+c]=hmm.transitions[j][j]
                    else:
                        self.transitions[5*i+j+c][5*i+j+c]=hmm.transitions[j][j]

 
        for i in range(len(self.states)):
            if i==0:
                for m in range(8):
                    self.transitions[i][i+5*m+1]=1/9
            elif i==1:
                for m in range(8):
                    self.transitions[i-5*m-1][i]=1/8
            elif i==null_states_indices[-1]:
                for m in range(10):
                    self.transitions[i-5*m-1][i]=1/10
            elif i==null_states_indices[3]:
                for m in range(10):
                    self.transitions[i-5*m-1][i]=1/11
                    self.transitions[i][i+5*m+1]=1/10
            else:
                for m in range(10):
                    self.transitions[i-5*m-1][i]=1/10
                    self.transitions[i][i+5*m+1]=1/10

    def most_probable_sequence(self, obs_seq):
        V = [{}]
        path = {}

        # Initialize base cases (t == 0)
        for state in self.states:
            initial_prob = self.initial_probabilities[self.state_index[state]]
            V[0][self.state_index[state]] = (math.log(initial_prob) if initial_prob > 0 else -math.inf) + state.log_multivariate_gaussian_pdf_diag_cov(obs_seq[0])
            path[self.state_index[state]] = [state]

        # Run Viterbi for t > 0
        for t in range(1, len(obs_seq)):
            V.append({})
            newpath = {}
            for cur_state in self.states:
                max_log_prob = -math.inf  # Initialize with negative infinity for comparison
                best_prev_state = None  # Initialize with None to find the best previous state
                for prev_state in self.states:
                    transition_prob = self.transitions[self.state_index[prev_state]][self.state_index[cur_state]]
                    log_transition_prob = math.log(transition_prob) if transition_prob > 0 else -math.inf
                    log_prob = V[t-1][self.state_index[prev_state]] + log_transition_prob + cur_state.log_multivariate_gaussian_pdf_diag_cov(obs_seq[t])
                    if log_prob > max_log_prob:
                        max_log_prob = log_prob
                        best_prev_state = prev_state
                V[t][self.state_index[cur_state]] = max_log_prob
                if best_prev_state is not None:  # Check to ensure there is a valid previous state
                    newpath[self.state_index[cur_state]] = path[self.state_index[best_prev_state]] + [cur_state]
            path = newpath

        # Find the final state with the highest probability
        max_final_log_prob = max(V[-1].values())
        final_state = [state for state, prob in V[-1].items() if prob == max_final_log_prob][0]

        return (max_final_log_prob, path[final_state])
                        



        
    




In [None]:
import librosa
import numpy as np
import os

def compute_mfcc_features(file_path, n_mfcc=39):
    y, sr = librosa.load(file_path)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    delta_mfcc = librosa.feature.delta(mfcc)
    delta2_mfcc = librosa.feature.delta(mfcc, order=2)
    features = np.vstack([mfcc, delta_mfcc, delta2_mfcc])
    return features

def process_folder(folder_path):
    features_dict = {}
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.wav'):  # Ensure processing only wav files
            file_path = os.path.join(folder_path, file_name)
            features = compute_mfcc_features(file_path)
            features_dict[file_name] = features
    return features_dict

# Assuming your test folder is in the current directory
folder_path = 'Project 5 first problem recordings'
features_dict = process_folder(folder_path)

FileNotFoundError: [WinError 3] 系统找不到指定的路径。: 'Project 5 first problem recordings'

{'1562.wav': array([[-2.14882019e+02, -2.25191101e+02, -2.12002579e+02, ...,
        -4.79662415e+02, -4.81805359e+02, -4.88441345e+02],
       [ 1.24905457e+02,  1.51454346e+02,  1.64693604e+02, ...,
         7.35189819e+01,  6.86013718e+01,  6.79574738e+01],
       [ 1.99001579e+01,  1.42831888e+01, -2.53910713e+01, ...,
         2.63672996e+00,  5.65973341e-01,  7.45037413e+00],
       ...,
       [-5.33646822e+00, -5.33646822e+00, -5.33646822e+00, ...,
        -1.60056382e-01, -1.60056382e-01, -1.60056382e-01],
       [-6.38999510e+00, -6.38999510e+00, -6.38999510e+00, ...,
         1.08368665e-01,  1.08368665e-01,  1.08368665e-01],
       [-4.96580887e+00, -4.96580887e+00, -4.96580887e+00, ...,
        -1.53980985e-01, -1.53980985e-01, -1.53980985e-01]], dtype=float32), '1972.wav': array([[-4.1525726e+02, -3.4787775e+02, -3.0859979e+02, ...,
        -4.7509970e+02, -4.7919733e+02, -4.8522427e+02],
       [ 1.1416258e+02,  1.5035587e+02,  1.6899896e+02, ...,
         6.3853119e+01,