<h1 style = "font-size:3rem;color:darkcyan"> Music genre classication - preprocessing data </h1>


In [10]:
# importing libraries
import numpy as np
import pandas as pd
import json
import os
import librosa
import librosa.display

In [21]:
def preprocess_dataset(dataset_path, json_path, audio_duration, n_segments = 10, n_mfcc = 13, n_fft = 2048, hop_size = 512, sample_rate=22100):
    data  = {
        'mappings' : [],  # corresponding genre
        'labels' : [],    # corresponding number
        'MFCC' : [],      # extracted mfcc
        'filenames' : []  # original filenames
     }
   
    n_samples_in_audio_file = int(sample_rate * audio_duration)
    n_samples_per_segment = int(np.ceil(n_samples_in_audio_file / n_segments))
    
    n_folders = len(os.listdir(dataset_path))
    
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(root)):
        if dirpath is not root:
            print(f'processing folder {i} out of {n_folders}')
            
            category = dirpath.split(os.sep)[-1] # last index in list when split
            data['mappings'].append(category)
            
            
            for file in filenames:
                # load audio
                file_path = os.path.join(dirpath + '/' + file)
                audio_file, _ = librosa.load(file_path, sr = sample_rate, mono=True)
               
                # segmentation (augmented data)
                for s in range(n_segments): 
                    start_sample = n_samples_per_segment * s
                    end_sample = start_sample + n_samples_per_segment
                    
                    # get segment
                    audio_segment = audio_file[start_sample:end_sample]
                    
                    # ignore if it contains too few samples
                    if len(audio_segment) >= n_samples_per_segment:

                        # extract mfccs
                        mfcc = librosa.feature.mfcc(y = audio_segment,
                                                    sr = sample_rate,
                                                    n_mfcc = n_mfcc,
                                                    n_fft = n_fft,
                                                    hop_length = hop_size)

                        # store data
                        data['labels'].append(i-1)
                        data['MFCC'].append(mfcc.T.tolist()) 
                        data['filenames'].append(file)        
    
    print(f'Saving dataset as {json_path}...')                
    with open(json_path, 'w') as fp:
        json.dump(data, fp, indent = 4)
    print(f'Done saving ') 

In [22]:
root = '../../../Datasets/Music/Genres/archive/Data/genres_original'
preprocess_dataset(root, 'data.json', n_segments = 10, audio_duration = 30)

processing folder 1 out of 10
processing folder 2 out of 10
processing folder 3 out of 10
processing folder 4 out of 10
processing folder 5 out of 10
processing folder 6 out of 10
processing folder 7 out of 10
processing folder 8 out of 10
processing folder 9 out of 10
processing folder 10 out of 10
Saving dataset as data.json...
Done saving 
