In [1]:
import librosa
import numpy as np
import pandas as pd
import scipy

In [3]:
# df = pd.read_csv("../datasets/fma/fma_small_extracted_features.csv")
# df["Genre"]

0       fma_small
1       fma_small
2       fma_small
3       fma_small
4       fma_small
          ...    
7980    fma_small
7981    fma_small
7982    fma_small
7983    fma_small
7984    fma_small
Name: Genre, Length: 7985, dtype: object

In [4]:
def columns():
    feature_sizes = dict(chroma_stft=12, chroma_cqt=12, chroma_cens=12,
                         mfcc=12, rms=1, spectral_centroid=1, spectral_bandwidth=1, 
                         spectral_contrast=7, spectral_flatness=1, spectral_rolloff=1,
                         poly_features=3, tonnetz=6, zcr=1, dtempo=1,
                         onset_strength=1, tempogram_ratio=13, plp=1)
    single_features = ['onset_num', 'beats', 'tempo', 'dtempo_changes']
    moments = ('mean', 'std', 'median', 'min', 'max')

    columns = []
    for name, size in feature_sizes.items():
        for moment in moments:
            it = (f"{name}_{i:02d}_{moment}" for i in range(size))
            columns.extend(it)
    # columns.extend(single_features)
    columns = np.sort(np.array(columns))
    columns = np.append(columns, single_features)
    columns = np.append(columns, 'Genre')
    return columns

In [5]:
len(columns())

435

In [21]:
def count_value_changes(arr):
    changes = 0
    for i in range(1, len(arr)):
        if arr[i] != arr[i - 1]:
            changes += 1
    return changes


def calculate_features_for_single_record(file_path):
    y, sr = librosa.load(file_path)
    
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr, n_chroma=12)  #
    chroma_cqt = librosa.feature.chroma_cqt(y=y, sr=sr, n_chroma=12)    #
    chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)               #
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=12)                  #
    rms = librosa.feature.rms(y=y)                                      #

    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)   #
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr) #
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)   #
    spectral_flatness = librosa.feature.spectral_flatness(y=y)          #
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)     #

    poly_features = librosa.feature.poly_features(y=y, sr=sr, order=2)  #
    tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr) #
    zcr = librosa.feature.zero_crossing_rate(y)                         #

    onset_env = librosa.onset.onset_strength(y=y, sr=sr)                #
    plp = librosa.beat.plp(onset_envelope=onset_env, sr=sr)             #
    
    dtempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr, aggregate=None) #
    tempogram_ratio = librosa.feature.tempogram_ratio(tg=librosa.feature.tempogram(y=y, sr=sr), sr=sr) #
    
    # Single features
    dtempo_changes = count_value_changes(dtempo)
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    onset_num = len(librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr))

    moments = ['mean', 'std', 'median', 'min', 'max']

    def aggregate_feature(feature):
        return [np.max(feature), np.mean(feature), np.median(feature), np.min(feature), np.std(feature)]
    
    features = []

    for f in [chroma_cens, chroma_cqt, chroma_stft, dtempo, mfcc, onset_env, plp, poly_features, rms, spectral_bandwidth,
              spectral_centroid, spectral_contrast, spectral_flatness, spectral_rolloff, tempogram_ratio, 
              tonnetz, zcr]:
        if f.ndim == 1:
            features.extend(aggregate_feature(f))
        else:
            features.extend(np.hstack([aggregate_feature(f[i]) for i in range(f.shape[0])]))

    features.append(onset_num)
    features.append(len(beats))
    features.append(tempo[0])
    features.append(dtempo_changes)

    genre = file_path.split('/')[-1].split("\\")[0]
    features.append(genre)

    return features

In [7]:
res = calculate_features_for_single_record('../datasets/fma/fma_small/Hip-Hop/000002.mp3')

In [8]:
len(res)

435

# Creating a features `.csv` file

In [24]:
from tqdm import tqdm
import os

df_features = pd.DataFrame(columns=columns())
df_features

Unnamed: 0,chroma_cens_00_max,chroma_cens_00_mean,chroma_cens_00_median,chroma_cens_00_min,chroma_cens_00_std,chroma_cens_01_max,chroma_cens_01_mean,chroma_cens_01_median,chroma_cens_01_min,chroma_cens_01_std,...,zcr_00_max,zcr_00_mean,zcr_00_median,zcr_00_min,zcr_00_std,onset_num,beats,tempo,dtempo_changes,Genre


In [25]:
rootdir = '../datasets/fma/fma_small/'
all_features = []

total_files = sum([len(files) for r, d, files in os.walk(rootdir) if any(f.endswith('.mp3') for f in files)])

with tqdm(total=total_files, desc="Processing files") as pbar:
    for subdir, dirs, _ in os.walk(rootdir):
        for folder in dirs:
            folder_path = os.path.join(subdir, folder)
            for _, _, files in os.walk(folder_path):
                for file in files:
                    if file.endswith('.mp3'):
                        path = os.path.join(folder_path, file)
                        try:
                            features = calculate_features_for_single_record(path)
                            # df_features.loc[len(df_features)] = features
                            all_features.append(features)
                            # print(df_features["Genre"])

                        except Exception as e:
                            print(f"Error processing {path}: {e}.")

                        pbar.update(1)

df_features = pd.DataFrame(all_features, columns=columns())

df_features.to_csv('extracted_features.csv', index=False)

Processing files:   0%|          | 11/7994 [00:24<4:50:42,  2.18s/it]


KeyboardInterrupt: 

In [114]:
# df_features.loc[len(df_features)] = res

In [15]:
df_features.head()

Unnamed: 0,chroma_cens_00_max,chroma_cens_00_mean,chroma_cens_00_median,chroma_cens_00_min,chroma_cens_00_std,chroma_cens_01_max,chroma_cens_01_mean,chroma_cens_01_median,chroma_cens_01_min,chroma_cens_01_std,...,zcr_00_max,zcr_00_mean,zcr_00_median,zcr_00_min,zcr_00_std,onset_num,beats,tempo,dtempo_changes,Genre
0,0.188383,0.083866,0.078871,0.0,0.043706,0.250713,0.126529,0.136529,0.0,0.071916,...,0.097168,0.015582,0.013672,0.006836,0.009015,211,97,198.768029,11,Electronic\001482.mp3
1,0.514906,0.361672,0.351852,0.235476,0.066464,0.300494,0.13982,0.13646,0.0,0.076768,...,0.569336,0.182679,0.164062,0.044922,0.073516,130,42,89.102909,7,Electronic\003573.mp3
2,0.329947,0.193748,0.202888,0.017711,0.060287,0.445039,0.251467,0.241952,0.087461,0.058671,...,0.147949,0.056129,0.055176,0.016602,0.019015,123,47,95.703125,7,Electronic\004519.mp3
3,0.47949,0.2738,0.263448,0.025881,0.091197,0.607575,0.284696,0.274103,0.022886,0.134396,...,0.230957,0.064415,0.058594,0.009766,0.032143,156,76,151.999081,15,Electronic\004520.mp3
4,0.491591,0.312343,0.301532,0.053283,0.082667,0.395521,0.207771,0.219119,0.006447,0.068787,...,0.224609,0.050325,0.047363,0.005371,0.030025,130,64,129.199219,1,Electronic\004521.mp3


# Parallel processing 

In [26]:
import concurrent.futures
import os
import librosa
import pandas as pd
import numpy as np
from tqdm import tqdm


def count_value_changes(arr):
    changes = 0
    for i in range(1, len(arr)):
        if arr[i] != arr[i - 1]:
            changes += 1
    return changes


def calculate_features_for_single_record(file_path):
    y, sr = librosa.load(file_path)
    
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr, n_chroma=12)
    chroma_cqt = librosa.feature.chroma_cqt(y=y, sr=sr, n_chroma=12)
    chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=12)
    rms = librosa.feature.rms(y=y)

    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    spectral_flatness = librosa.feature.spectral_flatness(y=y)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)

    poly_features = librosa.feature.poly_features(y=y, sr=sr, order=2)
    tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)

    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    plp = librosa.beat.plp(onset_envelope=onset_env, sr=sr)
    
    dtempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr, aggregate=None)
    tempogram_ratio = librosa.feature.tempogram_ratio(tg=librosa.feature.tempogram(y=y, sr=sr), sr=sr)
    
    dtempo_changes = count_value_changes(dtempo)
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    onset_num = len(librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr))

    moments = ['mean', 'std', 'median', 'min', 'max']

    def aggregate_feature(feature):
        return [np.max(feature), np.mean(feature), np.median(feature), np.min(feature), np.std(feature)]
    
    features = []

    for f in [chroma_cens, chroma_cqt, chroma_stft, dtempo, mfcc, onset_env, plp, poly_features, rms, spectral_bandwidth,
              spectral_centroid, spectral_contrast, spectral_flatness, spectral_rolloff, tempogram_ratio, 
              tonnetz, zcr]:
        if f.ndim == 1:
            features.extend(aggregate_feature(f))
        else:
            features.extend(np.hstack([aggregate_feature(f[i]) for i in range(f.shape[0])]))

    features.append(onset_num)
    features.append(len(beats))
    features.append(tempo[0])
    features.append(dtempo_changes)

    genre = file_path.split('/')[-1].split("\\")[0]
    features.append(genre)

    return features


def process_file(file_path):
    try:
        features = calculate_features_for_single_record(file_path)
        return features
    except Exception as e:
        print(f"Error processing {file_path}: {e}.")
        return None


def process_files_in_parallel(rootdir):
    all_features = []

    # Count total files for progress bar
    total_files = sum([len(files) for r, d, files in os.walk(rootdir) if any(f.endswith('.mp3') for f in files)])

    # Using ThreadPoolExecutor to process files in parallel
    with tqdm(total=total_files, desc="Processing files") as pbar, concurrent.futures.ThreadPoolExecutor() as executor:
        futures = []
        
        for subdir, dirs, _ in os.walk(rootdir):
            for folder in dirs:
                folder_path = os.path.join(subdir, folder)
                for _, _, files in os.walk(folder_path):
                    for file in files:
                        if file.endswith('.mp3'):
                            path = os.path.join(folder_path, file)
                            futures.append(executor.submit(process_file, path))
        
        for future in concurrent.futures.as_completed(futures):
            result = future.result()
            if result is not None:
                all_features.append(result)
            pbar.update(1)

    return all_features

# Define root directory and process files
rootdir = '../datasets/fma/fma_small/'
df_features = pd.DataFrame(process_files_in_parallel(rootdir), columns=columns())

# Save the features to CSV
df_features.to_csv('extracted_features.csv', index=False)

Processing files:   0%|          | 39/7994 [00:30<31:21,  4.23it/s]  

Error processing ../datasets/fma/fma_small/Electronic\021842.mp3: 'float' object is not subscriptable.


Processing files:   5%|▌         | 434/7994 [05:23<40:40,  3.10it/s]  

Error processing ../datasets/fma/fma_small/Electronic\084201.mp3: 'float' object is not subscriptable.


  return pitch_tuning(
Processing files:  11%|█         | 869/7994 [11:38<56:08,  2.12it/s]  

Error processing ../datasets/fma/fma_small/Electronic\132774.mp3: 'float' object is not subscriptable.


Processing files:  15%|█▍        | 1172/7994 [16:12<27:54,  4.07it/s]  

Error processing ../datasets/fma/fma_small/Experimental\030690.mp3: 'float' object is not subscriptable.


Processing files:  19%|█▉        | 1512/7994 [20:44<23:44,  4.55it/s]  

Error processing ../datasets/fma/fma_small/Experimental\073820.mp3: 'float' object is not subscriptable.
Error processing ../datasets/fma/fma_small/Experimental\073819.mp3: 'float' object is not subscriptable.


Processing files:  19%|█▉        | 1514/7994 [20:44<27:27,  3.93it/s]

Error processing ../datasets/fma/fma_small/Experimental\073821.mp3: 'float' object is not subscriptable.


Processing files:  19%|█▉        | 1515/7994 [20:50<1:47:24,  1.01it/s]

Error processing ../datasets/fma/fma_small/Experimental\073822.mp3: 'float' object is not subscriptable.


Processing files:  53%|█████▎    | 4226/7994 [56:00<1:31:07,  1.45s/it]

Error processing ../datasets/fma/fma_small/Instrumental\107535.mp3: 'float' object is not subscriptable.


Processing files: 100%|██████████| 7994/7994 [1:45:59<00:00,  1.26it/s]  


In [27]:
df_features.shape

(7985, 435)