In [29]:
import librosa
import numpy as np
import pandas as pd
import scipy

In [97]:
def columns():
    feature_sizes = dict(chroma_stft=12, chroma_cqt=12, chroma_cens=12,
                         mfcc=12, rms=1, spectral_centroid=1, spectral_bandwidth=1, 
                         spectral_contrast=7, spectral_flatness=1, spectral_rolloff=1,
                         poly_features=3, tonnetz=6, zcr=1, dtempo=1,
                         onset_strength=1, tempogram_ratio=13, plp=1)
    single_features = ['onset_num', 'beats', 'tempo', 'dtempo_changes']
    moments = ('mean', 'std', 'median', 'min', 'max')

    columns = []
    for name, size in feature_sizes.items():
        for moment in moments:
            it = (f"{name}_{i:02d}_{moment}" for i in range(size))
            columns.extend(it)
    # columns.extend(single_features)
    columns = np.sort(np.array(columns))
    columns = np.append(columns, single_features)
    columns = np.append(columns, 'Genre')
    return columns

In [100]:
len(columns())

435

In [111]:
# columns()

In [107]:
def count_value_changes(arr):
    changes = 0
    for i in range(1, len(arr)):
        if arr[i] != arr[i - 1]:
            changes += 1
    return changes


def calculate_features_for_single_record(file_path):
    y, sr = librosa.load(file_path)
    
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr, n_chroma=12)  #
    chroma_cqt = librosa.feature.chroma_cqt(y=y, sr=sr, n_chroma=12)    #
    chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)               #
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=12)                  #
    rms = librosa.feature.rms(y=y)                                      #

    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)   #
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr) #
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)   #
    spectral_flatness = librosa.feature.spectral_flatness(y=y)          #
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)     #

    poly_features = librosa.feature.poly_features(y=y, sr=sr, order=2)  #
    tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr) #
    zcr = librosa.feature.zero_crossing_rate(y)                         #

    onset_env = librosa.onset.onset_strength(y=y, sr=sr)                #
    plp = librosa.beat.plp(onset_envelope=onset_env, sr=sr)             #
    
    dtempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr, aggregate=None) #
    tempogram_ratio = librosa.feature.tempogram_ratio(tg=librosa.feature.tempogram(y=y, sr=sr), sr=sr) #
    
    # Single features
    dtempo_changes = count_value_changes(dtempo)
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    onset_num = len(librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr))

    moments = ['mean', 'std', 'median', 'min', 'max']

    def aggregate_feature(feature):
        return [np.max(feature), np.mean(feature), np.median(feature), np.min(feature), np.std(feature)]
    
    features = []

    for f in [chroma_cens, chroma_cqt, chroma_stft, dtempo, mfcc, onset_env, plp, poly_features, rms, spectral_bandwidth,
              spectral_centroid, spectral_contrast, spectral_flatness, spectral_rolloff, tempogram_ratio, 
              tonnetz, zcr]:
        if f.ndim == 1:
            features.extend(aggregate_feature(f))
        else:
            features.extend(np.hstack([aggregate_feature(f[i]) for i in range(f.shape[0])]))

    features.append(onset_num)
    features.append(len(beats))
    features.append(tempo[0])
    features.append(dtempo_changes)

    genre = file_path.split('/')[-2]
    features.append(genre)

    return features

In [108]:
res = calculate_features_for_single_record('../datasets/fma/fma_small/Hip-Hop/000002.mp3')

In [109]:
len(res)

435

# Creating a features `.csv` file

In [119]:
from tqdm import tqdm
import os

df_features = pd.DataFrame(columns=columns())
df_features

Unnamed: 0,chroma_cens_00_max,chroma_cens_00_mean,chroma_cens_00_median,chroma_cens_00_min,chroma_cens_00_std,chroma_cens_01_max,chroma_cens_01_mean,chroma_cens_01_median,chroma_cens_01_min,chroma_cens_01_std,...,zcr_00_max,zcr_00_mean,zcr_00_median,zcr_00_min,zcr_00_std,onset_num,beats,tempo,dtempo_changes,Genre


In [121]:
rootdir = '../datasets/fma/fma_small/'

total_files = sum([len(files) for r, d, files in os.walk(rootdir) if any(f.endswith('.mp3') for f in files)])

with tqdm(total=total_files, desc="Processing files") as pbar:
    for subdir, dirs, _ in os.walk(rootdir):
        for folder in dirs:
            folder_path = os.path.join(subdir, folder)
            for _, _, files in os.walk(folder_path):
                for file in files:
                    if file.endswith('.mp3'):
                        path = os.path.join(folder_path, file)
                        try:
                            features = calculate_features_for_single_record(path)
                            df_features.loc[len(df_features)] = features

                        except Exception as e:
                            print(f"Error processing {path}: {e}.")

                        pbar.update(1)

df_features.to_csv('extracted_features.csv', index=False)

Processing files:   0%|          | 0/7994 [00:00<?, ?it/s]

Processing files:   0%|          | 37/7994 [01:44<6:10:21,  2.79s/it]

Error processing ../datasets/fma/fma_small/Electronic\021842.mp3: 'float' object is not subscriptable.


Processing files:   5%|▌         | 429/7994 [19:54<5:30:02,  2.62s/it]

Error processing ../datasets/fma/fma_small/Electronic\084201.mp3: 'float' object is not subscriptable.


  return pitch_tuning(
Processing files:  11%|█         | 871/7994 [39:37<5:25:11,  2.74s/it]

Error processing ../datasets/fma/fma_small/Electronic\132774.mp3: 'float' object is not subscriptable.


Processing files:  15%|█▍        | 1171/7994 [51:36<4:14:33,  2.24s/it]

Error processing ../datasets/fma/fma_small/Experimental\030690.mp3: 'float' object is not subscriptable.


  return pitch_tuning(
Processing files:  19%|█▉        | 1512/7994 [1:04:55<4:10:42,  2.32s/it]

Error processing ../datasets/fma/fma_small/Experimental\073819.mp3: 'float' object is not subscriptable.


Processing files:  19%|█▉        | 1513/7994 [1:04:57<4:09:04,  2.31s/it]

Error processing ../datasets/fma/fma_small/Experimental\073820.mp3: 'float' object is not subscriptable.


Processing files:  19%|█▉        | 1514/7994 [1:05:00<4:08:56,  2.31s/it]

Error processing ../datasets/fma/fma_small/Experimental\073821.mp3: 'float' object is not subscriptable.


Processing files:  19%|█▉        | 1515/7994 [1:05:02<4:13:37,  2.35s/it]

Error processing ../datasets/fma/fma_small/Experimental\073822.mp3: 'float' object is not subscriptable.


  return pitch_tuning(
Processing files:  53%|█████▎    | 4236/7994 [2:59:11<2:08:09,  2.05s/it]

Error processing ../datasets/fma/fma_small/Instrumental\107535.mp3: 'float' object is not subscriptable.


  return pitch_tuning(
Processing files: 100%|██████████| 7994/7994 [5:33:14<00:00,  2.50s/it]  


In [114]:
df_features.loc[len(df_features)] = res

In [115]:
df_features

Unnamed: 0,chroma_cens_00_max,chroma_cens_00_mean,chroma_cens_00_median,chroma_cens_00_min,chroma_cens_00_std,chroma_cens_01_max,chroma_cens_01_mean,chroma_cens_01_median,chroma_cens_01_min,chroma_cens_01_std,...,zcr_00_max,zcr_00_mean,zcr_00_median,zcr_00_min,zcr_00_std,onset_num,beats,tempo,dtempo_changes,Genre
0,0.73768,0.492565,0.484671,0.314254,0.062149,0.479964,0.30733,0.301641,0.179593,0.049467,...,0.614746,0.164406,0.142578,0.030273,0.093938,151,82,161.499023,6,Hip-Hop
