In [1]:
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.stats import kurtosis
from scipy.stats import skew

from sklearn.preprocessing import MinMaxScaler

In [2]:
gtzan_dir = '../data/genres/'

In [3]:
# Parameters
song_samples = 22050*30
genres = {'metal': 0, 'disco': 1, 'classical': 2, 'hiphop': 3, 'jazz': 4, 
          'country': 5, 'pop': 6, 'blues': 7, 'reggae': 8, 'rock': 9}

In [4]:
def get_features(y, sr, n_fft = 1024, hop_length = 512):
    # Features to concatenate in the final dictionary
    features = {'centroid': None, 'roloff': None, 'flux': None, 'rmse': None, 'zcr': None}
    
    # Using librosa to calculate the features
    features['centroid'] = librosa.feature.spectral_centroid(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['roloff'] = librosa.feature.spectral_rolloff(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['zcr'] = librosa.feature.zero_crossing_rate(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['rmse'] = librosa.feature.rmse(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['flux'] = librosa.onset.onset_strength(y=y, sr=sr).ravel()
    
    # MFCC treatment
    mfcc = librosa.feature.mfcc(y, n_fft = n_fft, hop_length = hop_length, n_mfcc=13)
    for idx, v_mfcc in enumerate(mfcc):
        features['mfcc_{}'.format(idx)] = v_mfcc.ravel()
        
    # Get statistics from the vectors
    def get_moments(descriptors):
        result = {}
        for k, v in descriptors.items():
            result['{}_mean'.format(k)] = np.mean(v)
            result['{}_std'.format(k)] = np.std(v)
            result['{}_kurtosis'.format(k)] = kurtosis(v)
            result['{}_skew'.format(k)] = skew(v)
        return result
            
    return get_moments(features)

In [5]:
def read_process_songs(src_dir, debug = True):    
    # Empty array of dicts with the processed features from all files
    arr_features = []

    # Read files from the folders
    for x,_ in genres.items():
        folder = src_dir + x
        
        for root, subdirs, files in os.walk(folder):
            for file in files:
                # Read the audio file
                file_name = folder + "/" + file
                signal, sr = librosa.load(file_name)
                
                # Debug process
                if debug:
                    print("Reading file: {}".format(file_name))
                
                # Append the result to the data structure
                features = get_features(signal, sr)
                features['genre'] = genres[x]
                arr_features.append(features)
    return arr_features

In [6]:
# Get list of dicts with features and convert to dataframe
features = read_process_songs(gtzan_dir, debug=False)

In [7]:
df_features = pd.DataFrame(features)

In [8]:
df_features.shape

(1000, 73)

In [9]:
df_features.head()

Unnamed: 0,centroid_kurtosis,centroid_mean,centroid_skew,centroid_std,flux_kurtosis,flux_mean,flux_skew,flux_std,genre,mfcc_0_kurtosis,...,rmse_skew,rmse_std,roloff_kurtosis,roloff_mean,roloff_skew,roloff_std,zcr_kurtosis,zcr_mean,zcr_skew,zcr_std
0,1.27793,3176.995746,0.851571,550.485825,4.268698,1.131944,1.844055,0.640262,0,0.492081,...,0.741844,0.020419,-0.180793,6235.702536,0.740059,1316.78258,0.427356,0.183035,0.210292,0.053053
1,7.136744,2850.16303,1.872261,450.508356,6.209002,1.104693,2.24736,0.68968,0,-0.01418,...,0.180199,0.017282,3.561296,5926.993343,1.291193,852.51147,5.19555,0.15282,1.238239,0.046267
2,3.603367,2147.049597,1.476032,394.428946,7.961967,1.156895,2.375121,0.801254,0,0.685361,...,0.176828,0.028277,1.952754,4376.752601,1.401493,842.133273,3.95875,0.128326,1.088163,0.034394
3,-0.256259,2700.584556,0.124371,411.301524,4.641896,1.165081,1.818542,0.663197,0,0.039609,...,0.302373,0.035954,-0.233782,5178.110839,0.240711,679.032203,0.094853,0.166672,0.183645,0.053765
4,1.316912,3357.385889,0.691435,548.509155,6.928385,1.263426,1.851731,0.848756,0,19.288096,...,-0.679862,0.013262,-0.009224,6810.30432,0.129295,964.532861,0.85729,0.192344,0.62512,0.05669


In [10]:
df_features.to_csv('../data/GTZAN_Features.csv', index=False)