In [3]:
import os
import math
import json
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import tensorflow.keras as keras
import tensorflow as tf
import pickle

In [11]:
# save mfcc and label data into numpy arrays
def save_mfcc(dataset_path,  
                      track_duration=30, 
                      sr=22050, 
                      n_mfcc=13, 
                      n_fft=2048, 
                      hop_length=512, 
                      num_segments=10):

    mfcc_data = []
    labels = []
    # mapping of labels to integers
    label_dict = {
        'jazz': 0,
        'reggae': 1,
        'rock': 2,
        'blues': 3,
        'hiphop': 4,
        'country': 5,
        'metal': 6,
        'classical': 7,
        'disco': 8,
        'pop': 9
    }
    
    samples_per_track = sr * track_duration
    num_samples_per_segment = int(samples_per_track / num_segments)
    expected_num_mfcc_per_segment = math.ceil(num_samples_per_segment / hop_length)
    
    # loop through files
    for file in os.scandir(dataset_path):
        
        # parse file name for genre
        label_key = file.name.split('.')[0]
        
        # store integer label for genre
        label = label_dict[label_key]
        
        # process files
        signal, sr = librosa.load(file, sr=sr)
        
        # process segments and extract mfccs
        for s in range(num_segments):
            start_sample = num_samples_per_segment * s
            finish_sample = start_sample + num_samples_per_segment
            
            #store mfcc for segment if has correct vector length
            mfcc = librosa.feature.mfcc(signal[start_sample:finish_sample], 
                                        sr=sr,
                                        n_fft=n_fft,
                                        n_mfcc=n_mfcc, 
                                        hop_length=hop_length)
            mfcc = mfcc.T
            
            if len(mfcc) == expected_num_mfcc_per_segment:
                mfcc_data.append(mfcc.tolist())
                labels.append(label)
                
    mfcc_data = np.array(mfcc_data)
    labels = np.array(labels)

    return mfcc_data, labels
    

In [12]:
mfcc, labels = save_mfcc('../../audio_files')

In [13]:
mfcc.shape

(9996, 13)

In [7]:
labels.shape


(9996,)

In [9]:
# save np arrats into npz archive
np.savez('../data/data.npz', mfcc=mfcc, labels=labels)