# Importing necassary libraries


In [1]:
import os
import scipy.io.wavfile
import numpy as np
from python_speech_features import mfcc
from sklearn.mixture import GaussianMixture
import pickle

# Defining the function that reads the audios from a given path using scipy and returns 3 lists : audios, freqs, filepaths

In [2]:
def read_audios(path):
    audios = []
    freqs = []
    filepaths = []
    #walking through the directory that contains the dataset and reading each file that has the .wav extension
    for dp, dn, filenames in os.walk(path):
        for filename in filenames:
            if filename.endswith('.wav'):
                filepath = os.path.join(dp, filename)
                filepaths.append(filepath)
                with open(filepath, "rb") as f:
                    # load the audio using scipy
                    freq, data = scipy.io.wavfile.read(f, mmap=False)
                    # append the data and frequency to the respective lists
                    audios.append(data)
                    freqs.append(freq)
    return audios, freqs, filepaths
    

# Defining the function that extracts the mfcc features then removes the frames of silence finally it saves the mffc features into a .txt file according to gender 


In [33]:
def extractMfccs_RemoveSilence_saveMfccs(audio, freq, filepath, directory):

    
    mfcc_features = mfcc(audio, freq, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=3000, lowfreq=0,
                         highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=False)

    energy = np.sum(mfcc_features ** 2, axis=1)
    threshold = np.mean(energy) * 0.4
    voiced_indices = np.where(energy > threshold)[0]
    mfccs_voiced = mfcc_features[voiced_indices, :]

    print(f"MFCCs before removing silence: {mfcc_features.shape}")
    print(f"MFCCs after removing silence: {mfccs_voiced.shape}")

    gender = None
    if 'H' in filepath:
        gender = 'H'
    elif 'F' in filepath:
        gender = 'F'

    if gender is not None:
        gender_dir = os.path.join(directory, gender)
        if not os.path.exists(gender_dir):
            os.makedirs(gender_dir)
        mfcc_file = os.path.join(gender_dir, os.path.splitext(os.path.basename(filepath))[0] + ".mfcc")
        np.savetxt(mfcc_file, mfccs_voiced, delimiter=',')



# Defining the function that trains a GMM model and than save it as a pkl file

In [4]:
def gmm(parentDir,  n_components):

    # Loop over the two folders "H" and "F"
    for folder in ['H', 'F']:
        # Get the list of files in the folder
        folder_path = os.path.join(parentDir, folder)
         # Get the list of files in the folder
        files = os.listdir(folder_path)

        # Loop over the files in the folder
        for file in files:
            # Load the MFCC features from the file
            mfcc_features =np.loadtxt(os.path.join(folder_path, file), delimiter = ',')
    

            # Create a GMM object
            gmm = GaussianMixture(n_components=n_components)

            # Fit the GMM to the MFCC features
            gmm.fit(mfcc_features)

            # Save the trained GMM to a file with a name of Hi.n_components.gmm
            gmm_file_name = os.path.splitext(file)[0] + '.' + str(n_components) + '.gmm'
            gmm_file_path = os.path.join(r'C:\Users\ASUS ROG STRIX\Desktop\Projet\RAL\GMM', gmm_file_name)
            with open(gmm_file_path, 'wb') as f:
                pickle.dump(gmm, f)

In [31]:
train_dir = r'C:\Users\ASUS ROG STRIX\Desktop\Projet\RAL\Dataset\Train'
output_dir = r'C:\Users\ASUS ROG STRIX\Desktop\Projet\RAL\MFCC\Train'

audios, freqs, filepaths = read_audios(train_dir)
            

In [34]:
for audio, freq, filepath in zip(audios, freqs, filepaths):
    extractMfccs_RemoveSilence_saveMfccs(audio, freq, filepath, output_dir)

MFCCs before removing silence: (6065, 13)
MFCCs after removing silence: (6065, 13)
MFCCs before removing silence: (12667, 13)
MFCCs after removing silence: (12330, 13)
MFCCs before removing silence: (12047, 13)
MFCCs after removing silence: (12047, 13)
MFCCs before removing silence: (5999, 13)
MFCCs after removing silence: (5977, 13)
MFCCs before removing silence: (12029, 13)
MFCCs after removing silence: (12003, 13)
MFCCs before removing silence: (12093, 13)
MFCCs after removing silence: (12089, 13)
MFCCs before removing silence: (6055, 13)
MFCCs after removing silence: (5836, 13)
MFCCs before removing silence: (13580, 13)
MFCCs after removing silence: (13579, 13)
MFCCs before removing silence: (6807, 13)
MFCCs after removing silence: (6218, 13)
MFCCs before removing silence: (12030, 13)
MFCCs after removing silence: (11954, 13)
MFCCs before removing silence: (5999, 13)
MFCCs after removing silence: (5508, 13)
MFCCs before removing silence: (6012, 13)
MFCCs after removing silence: (60

In [35]:
n_components = [128, 256, 512, 1024]
for n_component in n_components:
    gmm(output_dir,  n_component)