# Feature Extraction
## Extracting MFCC features

In [26]:
import os
import librosa
import warnings
from tqdm import tqdm
import csv
import numpy as np

warnings.filterwarnings('ignore')

In [32]:
def get_mfcc_data(audio_path):
    mfcc_data = {}
    for filename in tqdm(os.listdir(audio_path)):
        if filename.endswith('wav'):
            y, sr = librosa.load(audio_path + '/' + filename)
            mfcc = librosa.feature.mfcc(y=y, sr=sr)
            mfcc_data[filename] = mfcc
    
    return mfcc_data

In [40]:
def make_mfcc_csv(mfcc_data, folder_path):
    for key, val in mfcc_data.items():
        np.savetxt(folder_path+'/mfcc_'+key[:-4]+'.csv', val, delimiter=',') #-4 to get rid of .wav

In [None]:
# Assume: Clean and Noisy audio files are in separate folders, in another folder named data

clean_mfcc = get_mfcc_data('data/clean_audio')
make_mfcc_csv(clean_mfcc, 'data/clean_audio')

noisy_mfcc = get_mfcc_data('data/noisy_audio')
make_mfcc_csv(noisy_mfcc, 'data/noisy_audio')

## Extracting Mel Spectrogram

In [47]:
def get_mel_spec_data(audio_path):
    mel_spec_data = {}
    for filename in tqdm(os.listdir(audio_path)):
        if filename.endswith('wav'):
            y, sr = librosa.load(audio_path + '/' + filename)
            mel_spec = librosa.feature.mfcc(y=y, sr=sr)
            mel_spec_data[filename] = mel_spec
    
    return mel_spec_data

In [48]:
def make_mel_spec_csv(mel_spec_data, folder_path):
    for key, val in mel_spec_data.items():
        np.savetxt(folder_path+'/mel_spec_'+key[:-4]+'.csv', val, delimiter=',') #-4 to get rid of .wav

In [None]:
clean_mfcc = get_mel_spec_data('data/clean_audio')
make_mel_spec_csv(clean_mfcc, 'data/clean_audio')

noisy_mfcc = get_mel_spec_data('data/noisy_audio')
make_mel_spec_csv(noisy_mfcc, 'data/noisy_audio')