In [1]:
import os
import librosa
import math
import json
import numpy as np
from sklearn.model_selection import train_test_split
import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1"
import tensorflow as tf
import matplotlib.pyplot as plt
from glob import glob

In [2]:
# GTZAN Dataset :  music Marsyas dataset
DATASET_PATH = "./data/Ravdess"
JSON_PATH = "./outputs/mfcc_form.json"
SAMPLE_RATE = 22050
DURATION = 3 # duration of each track is 30 s
SAMPLES_PER_TRACK = SAMPLE_RATE *DURATION

In [3]:
glob(DATASET_PATH)

['./data/Ravdess']

In [4]:
# this function converts the audio file into its mfcc coefficients
def save_mfcc(dataset_path,json_path,n_fft=2048,hop_length=512,n_mfcc=13,num_segments=1):
    # dictionary of data
    data = {
            "mfcc":[],
            "labels":[]
           }
    num_samples_per_segment = int(SAMPLES_PER_TRACK/num_segments)
    print(num_samples_per_segment)
    expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segment/hop_length)
    for i,(dirpath,dirnames,filenames) in enumerate(os.walk(dataset_path)):
        #ensure that you are not at root level
        if dirpath is not dataset_path:
            dirpath_components = dirpath.split('\'')
            #process files for specific genre
            for f in filenames:
                filepath = os.path.join(dirpath,f)
                print(filepath)
                signal,sr =librosa.load(filepath,sr=SAMPLE_RATE)
                #process segments extracting mfcc and storing data

                for s in range(num_segments):
                    start_sample = num_samples_per_segment * s
                    finish_sample = start_sample + num_samples_per_segment

                    mfcc = librosa.feature.mfcc(signal[start_sample:finish_sample],sr=sr,n_mfcc=n_mfcc,n_fft=n_fft,hop_length=hop_length)
                    mfcc = mfcc.T
                    
                    #store mfcc for segment if it has the expected length
                    if len(mfcc) == expected_num_mfcc_vectors_per_segment:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(int(filepath.split('\\')[-1].split('-')[2])-1)
                    
    with open(json_path,"w") as fp:
        json.dump(data,fp,indent=4)

In [None]:
save_mfcc(DATASET_PATH,JSON_PATH,num_segments=1)

66150
./data/Ravdess\Actor_01\03-01-01-01-01-01-01.wav
./data/Ravdess\Actor_01\03-01-01-01-01-02-01.wav
./data/Ravdess\Actor_01\03-01-01-01-02-01-01.wav
./data/Ravdess\Actor_01\03-01-01-01-02-02-01.wav
./data/Ravdess\Actor_01\03-01-02-01-01-01-01.wav
./data/Ravdess\Actor_01\03-01-02-01-01-02-01.wav
./data/Ravdess\Actor_01\03-01-02-01-02-01-01.wav
./data/Ravdess\Actor_01\03-01-02-01-02-02-01.wav
./data/Ravdess\Actor_01\03-01-02-02-01-01-01.wav
./data/Ravdess\Actor_01\03-01-02-02-01-02-01.wav
./data/Ravdess\Actor_01\03-01-02-02-02-01-01.wav
./data/Ravdess\Actor_01\03-01-02-02-02-02-01.wav
./data/Ravdess\Actor_01\03-01-03-01-01-01-01.wav
./data/Ravdess\Actor_01\03-01-03-01-01-02-01.wav
./data/Ravdess\Actor_01\03-01-03-01-02-01-01.wav
./data/Ravdess\Actor_01\03-01-03-01-02-02-01.wav
./data/Ravdess\Actor_01\03-01-03-02-01-01-01.wav
./data/Ravdess\Actor_01\03-01-03-02-01-02-01.wav
./data/Ravdess\Actor_01\03-01-03-02-02-01-01.wav
./data/Ravdess\Actor_01\03-01-03-02-02-02-01.wav
./data/Ravdess

./data/Ravdess\Actor_03\03-01-07-02-01-01-03.wav
./data/Ravdess\Actor_03\03-01-07-02-01-02-03.wav
./data/Ravdess\Actor_03\03-01-07-02-02-01-03.wav
./data/Ravdess\Actor_03\03-01-07-02-02-02-03.wav
./data/Ravdess\Actor_03\03-01-08-01-01-01-03.wav
./data/Ravdess\Actor_03\03-01-08-01-01-02-03.wav
./data/Ravdess\Actor_03\03-01-08-01-02-01-03.wav
./data/Ravdess\Actor_03\03-01-08-01-02-02-03.wav
./data/Ravdess\Actor_03\03-01-08-02-01-01-03.wav
./data/Ravdess\Actor_03\03-01-08-02-01-02-03.wav
./data/Ravdess\Actor_03\03-01-08-02-02-01-03.wav
./data/Ravdess\Actor_03\03-01-08-02-02-02-03.wav
./data/Ravdess\Actor_04\03-01-01-01-01-01-04.wav
./data/Ravdess\Actor_04\03-01-01-01-01-02-04.wav
./data/Ravdess\Actor_04\03-01-01-01-02-01-04.wav
./data/Ravdess\Actor_04\03-01-01-01-02-02-04.wav
./data/Ravdess\Actor_04\03-01-02-01-01-01-04.wav
./data/Ravdess\Actor_04\03-01-02-01-01-02-04.wav
./data/Ravdess\Actor_04\03-01-02-01-02-01-04.wav
./data/Ravdess\Actor_04\03-01-02-01-02-02-04.wav
./data/Ravdess\Actor

In [31]:
# mfcc data can be reloaded using this function, it serves as an input to the deep learning models
def load_data(dataset_path):
    with open(dataset_path,'r') as fp:
        data = json.load(fp)
        
    #convert lists to numpy arrays
    inputs = np.array(data["mfcc"])
    targets = np.array(data["labels"])
    
    return inputs,targets