In [1]:
import os
import re
import _pickle as pickle
import librosa
import numpy as np
import librosa
DATASET_DIR = 'dataset/geners/'
CLASSES = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
CLASSES_DICT = {g:i for i,g in enumerate(CLASSES)}
DEFAULT_SHAPE = (128, 647)

In [2]:
def get_genre_from_path(song_path):
    m = re.search(r'([a-z]+).\d{5}.(au|mp3)', song_path)
    return m.groups(0)[0]

def fix_shape(spectogram, default_shape=DEFAULT_SHAPE):
    if spectogram.shape[1] < default_shape[1]:
        diff = default_shape[1] - spectogram.shape[1]
        return np.append(spectogram, np.zeros((spectogram.shape[0], diff)), axis=1)
    if spectogram.shape[1] > default_shape[1]:
        return spectogram[:, :default_shape[1]]
    return spectogram


def process_song(song_path, overwrite=False):
    basename, extension = os.path.splitext(song_path)
    if extension in ('.mp3', '.au'):
        y, sr = librosa.load(song_path, mono=True)
        spectogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, n_fft=2048, hop_length=1024)
        spectogram = librosa.power_to_db(spectogram, ref=np.max)
        spectogram = fix_shape(spectogram)
        class_name = get_genre_from_path(song_path)
        with open(basename + '.pickle', 'wb') as f:
            f.write(pickle.dumps((spectogram, CLASSES_DICT[class_name])))    

        
    
def process_songs_in_dir(dirname, overwrite=False):
    for filename in os.listdir(dirname):
        song_path = os.path.join(dirname, filename)
        if os.path.isdir(song_path):
            process_songs_in_dir(song_path, overwrite=overwrite)
            continue
        process_song(song_path, overwrite=overwrite)

process_songs_in_dir(DATASET_DIR)