In [1]:
import os
import librosa
import numpy as np
import pandas as pd

In [78]:
def extract_features(y, sr=22050, n_fft=1024, hop_length=512):
    features = {
        "centroid": librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel(),
        "flux": librosa.onset.onset_strength(y=y, sr=sr).ravel(),
        "rmse": librosa.feature.rms(y=y, frame_length=n_fft, hop_length=hop_length).ravel(),
        "zcr": librosa.feature.zero_crossing_rate(y=y, frame_length=n_fft, hop_length=hop_length).ravel(),
        "contrast": librosa.feature.spectral_contrast(y=y, sr=sr).ravel(),
        "bandwidth": librosa.feature.spectral_bandwidth(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel(),
        "flatness": librosa.feature.spectral_flatness(y=y, n_fft=n_fft, hop_length=hop_length).ravel(),
        "rolloff": librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    }
    
    # MFCC treatment
    mfcc = librosa.feature.mfcc(y=y, n_fft=n_fft, hop_length=hop_length, n_mfcc=20)
    
    for idx, v_mfcc in enumerate(mfcc):
        features['mfcc_{}'.format(idx)] = v_mfcc.ravel()
        
    # Get statistics from the vectors
    def get_feature_stats(features):
        result = {}
        
        for k, v in features.items():
            result['{}_mean'.format(k)] = np.mean(v)
            result['{}_var'.format(k)] = np.var(v)
            
        return result
    
    dict_agg_features = get_feature_stats(features)
    dict_agg_features['tempo'] = librosa.feature.rhythm.tempo(y=y,sr=sr,hop_length=hop_length)[0]

    return dict_agg_features

In [79]:
base_path = "./music_genres"
dataset_path = "./music_genre_dataset.csv"

def preprocess_dataset():
    if os.path.isfile(dataset_path):
        music_genre_df = pd.read_csv(dataset_path)
        
        print(music_genre_df.info())
        print(music_genre_df.head())
        
        return

    music_genre_features_arr = []
    
    for genre in os.listdir(base_path):
        genre_path = os.path.join(base_path, genre)
        
        if not os.path.isdir(genre_path):
            continue
        
        for file_name in os.listdir(genre_path):
            if not file_name.endswith(".au"):
                continue
                                
            y, sr = librosa.load(os.path.join(genre_path, file_name), duration=30)
        
            features_dict = extract_features(y=y,sr=sr)
            features_dict['label'] = genre
            
            music_genre_features_arr.append(features_dict)
        
    music_genre_df = pd.DataFrame(data=music_genre_features_arr)
    
    print(music_genre_df.head())
    print(music_genre_df.shape)
    
    music_genre_df.to_csv(dataset_path, index=False)
    

In [77]:
preprocess_dataset()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 58 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   centroid_mean   1000 non-null   float64
 1   centroid_var    1000 non-null   float64
 2   flux_mean       1000 non-null   float64
 3   flux_var        1000 non-null   float64
 4   rmse_mean       1000 non-null   float64
 5   rmse_var        1000 non-null   float64
 6   zcr_mean        1000 non-null   float64
 7   zcr_var         1000 non-null   float64
 8   contrast_mean   1000 non-null   float64
 9   contrast_var    1000 non-null   float64
 10  bandwidth_mean  1000 non-null   float64
 11  bandwidth_var   1000 non-null   float64
 12  flatness_mean   1000 non-null   float64
 13  flatness_var    1000 non-null   float64
 14  rolloff_mean    1000 non-null   float64
 15  rolloff_var     1000 non-null   float64
 16  mfcc_0_mean     1000 non-null   float64
 17  mfcc_0_var      1000 non-null   fl