In [2]:
import os, sys
import numpy as np
import pandas as pd
import librosa
import IPython.display as ipd
from tqdm import tqdm
import matplotlib.pyplot as plt
import librosa.display
import tensorflow as tf

In [3]:
# Get the base directory
basedir = os.getcwd()
dirname = basedir+ "/Data/genres_original"

# Save audio paths and labels
audio_paths = []
# audio_dict = {}
audio_label = []
# Print all the files in different directories
for root, dirs, files in os.walk(dirname, topdown=False):
    for filenames in files:
        if filenames.find('.wav') != -1:

            audio_paths.append(os.path.join(root, filenames))
            filenames = filenames.split('.', 1)
            filenames = filenames[0]
            audio_label.append(filenames)
audio_paths = np.array(audio_paths)
audio_label = np.array(audio_label)
audio_paths.shape


(1000,)

In [4]:
# Create empty arrays to save the features
AllSpec = np.empty([1000, 1025, 1293])
AllMel = np.empty([1000, 128, 1293])
AllMfcc = np.empty([1000, 10, 1293])
AllZcr = np.empty([1000, 1293])
AllCen = np.empty([1000, 1293])
AllChroma = np.empty([1000, 12, 1293])

count = 0
bad_index = []
for i in tqdm(range(len(audio_paths))):
    try:

        path = audio_paths[i]
        y, sr = librosa.load(path)
        # For Spectrogram
        X = librosa.stft(y)
        Xdb = librosa.amplitude_to_db(abs(X))
        AllSpec[i] = Xdb
        
        # Mel-Spectrogram 
        M = librosa.feature.melspectrogram(y=y)
        M_db = librosa.power_to_db(M)
        AllMel[i] = M_db
        
        # MFCC
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc= 10)
        AllMfcc[i] = mfcc
        
        # Zero-crossing rate
        zcr = librosa.feature.zero_crossing_rate(y)[0]
        AllZcr[i] = zcr
        
        # Spectral centroid
        sp_cen = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
        AllCen[i] = sp_cen
        
        # Chromagram
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr, n_chroma=12, n_fft=4096)
        AllChroma[i] = chroma_stft

        

    except Exception as e:
        bad_index.append(i)

# Delete audio at corrupt indices
AllSpec = np.delete(AllSpec, bad_index, 0)
AllMel = np.delete(AllMel, bad_index, 0)
AllMfcc = np.delete(AllMfcc, bad_index, 0)
AllZcr = np.delete(AllZcr, bad_index, 0)
AllCen = np.delete(AllCen, bad_index, 0)
AllChroma = np.delete(AllChroma, bad_index, 0)

# Convert to float32
AllSpec = AllSpec.astype(np.float32)
AllMel = AllMel.astype(np.float32)
AllMfcc = AllMfcc.astype(np.float32)
AllZcr = AllZcr.astype(np.float32)
AllCen = AllCen.astype(np.float32)
AllChroma = AllChroma.astype(np.float32)

# Delete labels at corrupt indices
audio_label = np.delete(audio_label, bad_index)

# Convert labels from string to numerical
audio_label[audio_label == 'blues'] = 0
audio_label[audio_label == 'classical'] = 1
audio_label[audio_label == 'country'] = 2
audio_label[audio_label == 'disco'] = 3
audio_label[audio_label == 'hiphop'] = 4
audio_label[audio_label == 'jazz'] = 5
audio_label[audio_label == 'metal'] = 6
audio_label[audio_label == 'pop'] = 7
audio_label[audio_label == 'reggae'] = 8
audio_label[audio_label == 'rock'] = 9
audio_label = [int(i) for i in audio_label]
audio_label = np.array(audio_label)

  y, sr = librosa.load(path)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
100%|██████████| 1000/1000 [04:57<00:00,  3.36it/s]


In [6]:
y = tf.keras.utils.to_categorical(audio_label,num_classes = 10)

# Save all the features and labels in a .npz file
np.savez_compressed(os.getcwd()+"/MusicFeatures.npz", spec= AllSpec, mel= AllMel, mfcc= AllMfcc, zcr= AllZcr, cen= AllCen, chroma= AllChroma, target=y)