# Generate MFCC for Cut Snippets

In [1]:
# adjust as necessary
max_file_count = None
none_replacement = 'all'

# as specified in `src/research/cut/cut.ipynb`
SAMPLE_RATE = 22_050    # 22.05 kHz (default)

# Read Data

Note that [`src/research/new_cut/new_cut.ipynb`](../../new_cut/new_cut.ipynb) has to be executed first!

In [3]:
import numpy as np
import os

cut_dir = os.path.join(os.path.join(os.pardir, os.pardir), 'new_cut')
waveform_file = os.path.join(cut_dir, f'waveform_{max_file_count or none_replacement}.npy')
label_file = os.path.join(cut_dir, f'labels_{max_file_count or none_replacement}.npy')

waveforms = np.load(waveform_file)
labels = np.load(label_file)

In [4]:
print(waveforms.shape)
print(labels.shape)

(39622, 22050)
(39622,)


# Generate MFCC

In [6]:
import librosa

def get_feature(waveform, sample_rate):
    return librosa.feature.mfcc(y=waveform, sr=sample_rate, n_mfcc=1)

In [7]:
from tqdm import tqdm

feature_matrix = []

for index, waveform in enumerate(tqdm(waveforms, unit='snippet')):    
    feature = get_feature(waveform, SAMPLE_RATE)
    feature_matrix.append(feature)

100%|██████████| 39622/39622 [03:01<00:00, 218.86snippet/s]


In [8]:
feature_matrix = np.array(feature_matrix).squeeze()

In [9]:
np.save(f'feature_{max_file_count or none_replacement}.npy', feature_matrix)
np.save(f'labels_{max_file_count or none_replacement}.npy', labels)