                                                            |Pre processing data|

1. Importing Libraries and defining certain constants

In [180]:
import librosa
import os
import json
import numpy as np
from scipy.signal import stft, istft
import librosa.display
import IPython.display as ipd

DATASET_PATH = 'voiceAuth'
JSON_PATH = "auth.json"
SAMPLES_TO_CONSIDER = 22050 #equivalent to 1 second



In [181]:
# filtering
def filteringData(audio):
  non_silent_intervals = librosa.effects.split(audio, top_db = 30, frame_length = 2048, hop_length = 512)
  non_silent_audio = np.concatenate([audio[start:end] for start, end in non_silent_intervals])
  return non_silent_audio 

def spectral_subtraction(noisy_audio, sr = 22050, noise_start = 0, noise_end = 1):
    f, t, Zxx = stft(noisy_audio, fs=sr, nperseg=1024)

    noise_idx = (t >= noise_start) & (t <= noise_end)
    noise_spectrum = np.mean(np.abs(Zxx[:, noise_idx]), axis=1, keepdims=True)

    magnitude = np.abs(Zxx) - noise_spectrum
    magnitude = np.maximum(magnitude, 0)

    Zxx_denoised = magnitude * np.exp(1j * np.angle(Zxx))

    _, denoised_audio = istft(Zxx_denoised, fs=sr, nperseg=1024)

    return denoised_audio

2. Preparing the dataset

In [182]:
def prepare_dataset(dataset_path, json_path, n_mfcc=13, hop_length=512, n_fft=2048):
    print("wtf13")
    #data dictionary
    data = {
        "labels_gender": [], #either 0 or 1 for female or male
        "labels_ID": [],
        "features": [],
        "files":[]
    }
    
    #loop through all the sub-dirs
    for f in os.listdir(dataset_path):
        f = os.path.join(dataset_path, f)
        print("wtf1")
            
        # get file path
        print("file path", f)
                
        # load audio file
        signal_org, sr = librosa.load(f)
        
        signal_org = filteringData(signal_org)
        signal_org = spectral_subtraction(signal_org)
        print(len(signal_org))
        ipd.Audio(signal_org, rate = 22500)
        
        # ensure the audio file at least 1 sec
        t = 30
        print("jj")
        
        if len(signal_org) >= t*SAMPLES_TO_CONSIDER:
            print("jj")
            # splitting the name
            filename_extracted = f.split('_')
            label_gender = filename_extracted[3].split(".")[0]
            
            # 0 for male and 1 for female
            if label_gender == 'male':
                label_gender = 0
            else:
                label_gender = 1
            labels = [label_gender,filename_extracted[2]]
        
            print(labels)
            
            for i in range(0,t):
                # enforce 1 sec. long signal
                signal = signal_org[SAMPLES_TO_CONSIDER*(i):SAMPLES_TO_CONSIDER*(i+1)]
                # extract the MFCCs
                MFCCs = np.mean(librosa.feature.mfcc(y=signal, n_mfcc=n_mfcc, hop_length=hop_length,n_fft=n_fft))
                log_mel = np.mean(librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=128), axis=1)  # Log Mel-Spectrogram (128 values)
                spec_centroid = np.mean(librosa.feature.spectral_centroid(y=signal, sr=sr), axis=1)  # Spectral Centroid (1 value)
                spec_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=signal, sr=sr), axis=1)  # Spectral Bandwidth (1 value)
                spec_contrast = np.mean(librosa.feature.spectral_contrast(y=signal, sr=sr), axis=1)  # Spectral Contrast (7 values)
                # zcr = np.mean(librosa.feature.zero_crossing_rate(y=signal),axis=0 )  # Zero-Crossing Rate (1 value)
                energy = np.mean(np.square(signal))  # Energy (1 value)
                feature_vector = np.hstack([MFCCs, log_mel, spec_centroid, spec_bandwidth, spec_contrast, energy])
            
                # store data
                data["labels_gender"].append(labels[0])
                data["labels_ID"].append(labels[1])
                data["features"].append(feature_vector.T.tolist())
                data["files"].append(f)
            
            
            
    # store in json file
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
    

In [183]:
prepare_dataset(DATASET_PATH, JSON_PATH)

wtf13
wtf1
file path data\HW1_Q3_810103226_male.mp3
1122816
jj
jj
[0, '810103226']
wtf1
file path data\HW1_Q4_810102087_female.mp3
1483776
jj
jj
[1, '810102087']
wtf1
file path data\HW1_Q4_810103226_male.mp3
3432960
jj
jj
[0, '810103226']
wtf1
file path data\HW1_Q6_810102263_male.mp3
3921408
jj
jj
[0, '810102263']
wtf1
file path data\HW1_Q6_810103123_female.mp3
6493696
jj
jj
[1, '810103123']
wtf1
file path data\HW1_Q6_810199459_male.mp3
9097216
jj
jj
[0, '810199459']
wtf1
file path data\HW1_Q6_810199489_female.mp3
45032448
jj
jj
[1, '810199489']
wtf1
file path data\HW1_Q6_810199569_male.mp3
14795776
jj
jj
[0, '810199569']
wtf1
file path data\HW1_Q6_810199570_female.mp3
12636672
jj
jj
[1, '810199570']
wtf1
file path data\HW1_Q6_810202047_male.mp3
1388032
jj
jj
[0, '810202047']
wtf1
file path data\HW1_Q6_810600065_female.mp3
9160192
jj
jj
[1, '810600065']
wtf1
file path data\HW1_Q6_810600088_female.mp3
5325312
jj
jj
[1, '810600088']
wtf1
file path data\HW1_Q6_810600097_female.mp3
1494169