# Generate MFCC for Cut Snippets

In [1]:
# adjust as necessary
max_file_count = None
none_replacement = 'all'

# as specified in `src/research/cut/cut.ipynb`
SAMPLE_RATE = 22_050    # 22.05 kHz (default)

# Read Data

Note that [`src/research/cut/cut.ipynb`](../../cut/cut.ipynb) has to be executed first!

In [2]:
import numpy as np
np.set_printoptions(precision=4, suppress=True)
import os

In [3]:
cut_dir = os.path.join(os.path.join(os.pardir, os.pardir), 'cut')
waveform_file = os.path.join(cut_dir, f'cut_waveform_{max_file_count or none_replacement}.npy')
label_file = os.path.join(cut_dir, f'cut_labels_{max_file_count or none_replacement}.npy')

waveforms = np.load(waveform_file)
labels = np.load(label_file)

In [4]:
print(waveforms.shape)
print(labels.shape)

(39704, 22050)
(39704,)


# Generate MFCC

In [5]:
import librosa
from tqdm import tqdm

In [6]:
def get_feature(waveform, sample_rate):
    return librosa.feature.mfcc(y=waveform, sr=sample_rate, n_mfcc=1)

In [7]:
feature_matrix = []

for index, waveform in enumerate(tqdm(waveforms, unit='snippet')):    
    feature = get_feature(waveform, SAMPLE_RATE)
    feature_matrix.append(feature)

100%|██████████| 39704/39704 [02:44<00:00, 241.23snippet/s]


In [8]:
feature_matrix = np.array(feature_matrix).squeeze()

In [9]:
np.save(f'mfcc_feature_cut_{max_file_count or none_replacement}.npy', feature_matrix)
np.save(f'labels_cut_{max_file_count or none_replacement}.npy', labels)