# Mass Feature Grabbing

Within the librosa library, there are a large number of features that can be extracted from each song.

## 1 - Do the imports

In [43]:
import os
import numpy as np # For numerical operations
import matplotlib.pyplot as plt # For plotting
import gc # For garbage collection
import librosa # For audio processing

import sys

!{sys.executable} -m pip install tensorflow
!{sys.executable} -m pip install tensorflow-hub
!{sys.executable} -m pip install tensorflow_hub
!{sys.executable} -m pip install opensmile

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Defaulting to user installation because normal site-packages is not writeable
Collecting opensmile
  Downloading opensmile-2.5.1-py3-none-macosx_11_0_arm64.whl (706 kB)
[K     |████████████████████████████████| 706 kB 7.5 MB/s eta 0:00:01
[?25hCollecting audinterface>=0.7.0
  Downloading audinterface-1.3.1-py3-none-any.whl (81 kB)
[K     |████████████████████

## 2 - Setup the basepath and the genres

In [44]:
BASEPATH = os.path.join("./Data", "genres_original")
GENRES = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]

## 3 - Grabbing all the features (30 seconds)

In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import opensmile
import gc  # For garbage collection

allSongs = []

import tensorflow as tf
import tensorflow_hub as hub

yamnet_model_handle = "https://tfhub.dev/google/yamnet/1"
yamnet_model = hub.load(yamnet_model_handle)

def get_yamnet_embedding(y, sr):
    import librosa
    if sr != 16000:
        y = librosa.resample(y, orig_sr=sr, target_sr=16000)
        sr = 16000
    waveform = y.astype('float32')
    scores, embeddings, spectrogram = yamnet_model(waveform)
    embedding_vector = tf.reduce_mean(embeddings, axis=0).numpy()
    return embedding_vector

for genre in GENRES:
    genre_path = os.path.join(BASEPATH, genre)

    for filename in os.listdir(genre_path):
        if not filename.endswith(".wav"):
            continue

        file_path = os.path.join(genre_path, filename)
        y, sr = librosa.load(file_path, sr=22050)

        # Ensure the audio is exactly 30 seconds
        y = librosa.util.fix_length(y, size=30 * sr)

        features = {}

        # Unique filename for each segment
        specificFilename = filename.replace(".wav", f"_{segment_idx}.wav")
        features["filename"] = specificFilename
        features["label"] = genre
        features["song_id"] = f"{genre}_{filename.split('.')[0]}"

        y_segment = segment  # Use the current segment for feature extraction

        # Compute chroma_stft
        chroma = librosa.feature.chroma_stft(y=y_segment, sr=sr)
        chroma_mean = np.mean(chroma, axis=1)
        chroma_var = np.var(chroma, axis=1)
        for i in range(len(chroma_mean)):
            features[f"chroma_stft_mean_{i}"] = chroma_mean[i]
            features[f"chroma_stft_var_{i}"] = chroma_var[i]

        # Spectral centroid
        centroid = librosa.feature.spectral_centroid(y=y_segment, sr=sr)
        features["centroid_mean"] = np.mean(centroid)
        features["centroid_var"] = np.var(centroid)

        # Spectral bandwidth
        bandwidth = librosa.feature.spectral_bandwidth(y=y_segment, sr=sr)
        features["bandwidth_mean"] = np.mean(bandwidth)
        features["bandwidth_var"] = np.var(bandwidth)

        # Spectral rolloff
        rolloff = librosa.feature.spectral_rolloff(y=y_segment, sr=sr)
        features["rolloff_mean"] = np.mean(rolloff)
        features["rolloff_var"] = np.var(rolloff)

        # Zero Crossing Rate
        zcr = librosa.feature.zero_crossing_rate(y=y_segment)
        features["zcr_mean"] = np.mean(zcr)
        features["zcr_var"] = np.var(zcr)

        # Harmonic and Percussive components
        y_harmonic, y_percussive = librosa.effects.hpss(y_segment)

        harmony = librosa.feature.chroma_stft(y=y_harmonic, sr=sr)
        harmony_mean = np.mean(harmony, axis=1)
        harmony_var = np.var(harmony, axis=1)
        for i in range(len(harmony_mean)):
            features[f"harmony_mean_{i}"] = harmony_mean[i]
            features[f"harmony_var_{i}"] = harmony_var[i]

        percussive = librosa.feature.chroma_stft(y=y_percussive, sr=sr)
        percussive_mean = np.mean(percussive, axis=1)
        percussive_var = np.var(percussive, axis=1)
        for i in range(len(percussive_mean)):
            features[f"percussive_mean_{i}"] = percussive_mean[i]
            features[f"percussive_var_{i}"] = percussive_var[i]

        # MFCCs and deltas
        mfccs = librosa.feature.mfcc(y=y_segment, sr=sr, n_mfcc=13)
        delta_mfccs = librosa.feature.delta(mfccs, order=1)
        delta2_mfccs = librosa.feature.delta(mfccs, order=2)
        for i in range(mfccs.shape[0]):
            features[f"mfcc_{i}_mean"] = np.mean(mfccs[i])
            features[f"mfcc_{i}_var"] = np.var(mfccs[i])
            features[f"delta_mfcc_{i}_mean"] = np.mean(delta_mfccs[i])
            features[f"delta_mfcc_{i}_var"] = np.var(delta_mfccs[i])
            features[f"delta2_mfcc_{i}_mean"] = np.mean(delta2_mfccs[i])
            features[f"delta2_mfcc_{i}_var"] = np.var(delta2_mfccs[i])

        # Spectral contrast
        spectral_contrast = librosa.feature.spectral_contrast(y=y_segment, sr=sr)
        for i in range(spectral_contrast.shape[0]):
            features[f"spectral_contrast_{i}_mean"] = np.mean(spectral_contrast[i])
            features[f"spectral_contrast_{i}_var"] = np.var(spectral_contrast[i])

        # Tonnetz
        tonnetz = librosa.feature.tonnetz(y=y_segment, sr=sr)
        for i in range(tonnetz.shape[0]):
            features[f"tonnetz_{i}_mean"] = np.mean(tonnetz[i])
            features[f"tonnetz_{i}_var"] = np.var(tonnetz[i])

        # Chroma CQT and CENS
        chroma_cqt = librosa.feature.chroma_cqt(y=y_segment, sr=sr)
        chroma_cqt_mean = np.mean(chroma_cqt, axis=1)
        chroma_cqt_var = np.var(chroma_cqt, axis=1)
        for i in range(len(chroma_cqt_mean)):
            features[f"chroma_cqt_mean_{i}"] = chroma_cqt_mean[i]
            features[f"chroma_cqt_var_{i}"] = chroma_cqt_var[i]

        chroma_cens = librosa.feature.chroma_cens(y=y_segment, sr=sr)
        chroma_cens_mean = np.mean(chroma_cens, axis=1)
        chroma_cens_var = np.var(chroma_cens, axis=1)
        for i in range(len(chroma_cens_mean)):
            features[f"chroma_cens_mean_{i}"] = chroma_cens_mean[i]
            features[f"chroma_cens_var_{i}"] = chroma_cens_var[i]

        # Chroma VQT
        chroma_vqt = librosa.feature.chroma_vqt(y=y_segment, sr=sr, intervals='equal')
        chroma_vqt_mean = np.mean(chroma_vqt, axis=1)
        chroma_vqt_var = np.var(chroma_vqt, axis=1)
        for i in range(len(chroma_vqt_mean)):
            features[f"chroma_vqt_mean_{i}"] = chroma_vqt_mean[i]
            features[f"chroma_vqt_var_{i}"] = chroma_vqt_var[i]

        # Mel spectrogram
        mel_spectrogram = librosa.feature.melspectrogram(y=y_segment, sr=sr)
        mel_spectrogram_mean = np.mean(mel_spectrogram, axis=1)
        mel_spectrogram_var = np.var(mel_spectrogram, axis=1)
        for i in range(len(mel_spectrogram_mean)):
            features[f"mel_spectrogram_mean_{i}"] = mel_spectrogram_mean[i]
            features[f"mel_spectrogram_var_{i}"] = mel_spectrogram_var[i]

        # Spectral flatness
        spectral_flatness = librosa.feature.spectral_flatness(y=y_segment)
        features["spectral_flatness_mean"] = np.mean(spectral_flatness)
        features["spectral_flatness_var"] = np.var(spectral_flatness)

        # Polynomial features
        poly_features = librosa.feature.poly_features(y=y_segment, sr=sr)
        poly_features_mean = np.mean(poly_features, axis=1)
        poly_features_var = np.var(poly_features, axis=1)
        for i in range(len(poly_features_mean)):
            features[f"poly_features_mean_{i}"] = poly_features_mean[i]
            features[f"poly_features_var_{i}"] = poly_features_var[i]

        # Tempo and tempogram
        tempo, _ = librosa.beat.beat_track(y=y_segment, sr=sr)
        features["tempo"] = tempo

        tempogram = librosa.feature.tempogram(y=y_segment, sr=sr)
        tempogram_mean = np.mean(tempogram, axis=1)
        tempogram_var = np.var(tempogram, axis=1)
        for i in range(len(tempogram_mean)):
            features[f"tempogram_mean_{i}"] = tempogram_mean[i]
            features[f"tempogram_var_{i}"] = tempogram_var[i]

        fourier_tempogram = librosa.feature.fourier_tempogram(y=y_segment, sr=sr)
        fourier_tempogram_mean = np.mean(fourier_tempogram, axis=1)
        fourier_tempogram_var = np.var(fourier_tempogram, axis=1)
        for i in range(len(fourier_tempogram_mean)):
            features[f"fourier_tempogram_mean_{i}"] = fourier_tempogram_mean[i]
            features[f"fourier_tempogram_var_{i}"] = fourier_tempogram_var[i]

        tempogram_ratio = librosa.feature.tempogram(y=y_segment, sr=sr)
        tempogram_ratio_mean = np.mean(tempogram_ratio, axis=1)
        tempogram_ratio_var = np.var(tempogram_ratio, axis=1)
        for i in range(len(tempogram_ratio_mean)):
            features[f"tempogram_ratio_mean_{i}"] = tempogram_ratio_mean[i]
            features[f"tempogram_ratio_var_{i}"] = tempogram_ratio_var[i]

        # Stack memory
        stack_memory = librosa.feature.stack_memory(y_segment)
        stack_memory_mean = np.mean(stack_memory, axis=1)
        stack_memory_var = np.var(stack_memory, axis=1)
        for i in range(len(stack_memory_mean)):
            features[f"stack_memory_mean_{i}"] = stack_memory_mean[i]
            features[f"stack_memory_var_{i}"] = stack_memory_var[i]
            
        yamnet_emb = get_yamnet_embedding(y_segment, sr)
        for i, val in enumerate(yamnet_emb):
            features[f"yamnet_emb_{i}"] = val
            
        # smile = opensmile.Smile(
        #     feature_set=opensmile.FeatureSet.eGeMAPSv02,
        #     feature_level=opensmile.FeatureLevel.LowLevelDescriptors
        # )
        
        # smile_features = smile.process_signal(y_segment, sr)
        # for key, value in smile_features.items():
        #     if isinstance(value, np.ndarray):
        #         features[key] = value.mean()
                
        # features["opemsmile_mean"] = np.mean(smile_features)
        # features["opemsmile_var"] = np.var(smile_features)

        print(len(features))
        allSongs.append(features)
        print(f"Processed {filename} in genre {genre}")

        gc.collect()  # Run garbage collection to free memory

# Save to CSV
df = pd.DataFrame(allSongs)
os.makedirs(BASEPATH, exist_ok=True)
df.to_csv(os.path.join(BASEPATH, "all_30_second_features_full.csv"), index=False)
print("Feature extraction complete and saved.")

  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00093.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00087.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00050.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00044.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00078.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00079.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00045.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00051.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00086.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00092.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00084.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00090.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00047.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00053.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00052.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00046.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00091.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00085.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00081.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00095.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00042.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00056.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00057.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00043.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00094.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00080.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00096.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00082.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00069.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00055.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00041.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00040.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00054.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00068.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00083.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00097.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00033.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00027.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00026.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00032.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00024.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00030.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00018.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00019.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00031.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00025.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00009.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00021.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00035.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00034.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00020.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00008.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00036.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00022.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00023.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00037.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00012.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00006.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00007.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00013.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00005.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00011.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00039.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00038.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00010.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00004.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00028.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00000.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00014.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00015.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00001.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00029.wav in genre blues


  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


3474
Processed blues.00017.wav in genre blues


KeyboardInterrupt: 

## 4 - Grabbing all the features (3 seconds)

In [None]:
import os
import numpy as np
import pandas as pd
import librosa

allSongs = []

import tensorflow as tf
import tensorflow_hub as hub

yamnet_model_handle = "https://tfhub.dev/google/yamnet/1"
yamnet_model = hub.load(yamnet_model_handle)

def get_yamnet_embedding(y, sr):
    import librosa
    if sr != 16000:
        y = librosa.resample(y, orig_sr=sr, target_sr=16000)
        sr = 16000
    waveform = y.astype('float32')
    scores, embeddings, spectrogram = yamnet_model(waveform)
    embedding_vector = tf.reduce_mean(embeddings, axis=0).numpy()
    return embedding_vector

for genre in GENRES:
    genre_path = os.path.join(BASEPATH, genre)
    for filename in os.listdir(genre_path):
        if not filename.endswith(".wav"):
            continue

        file_path = os.path.join(genre_path, filename)
        y, sr = librosa.load(file_path, sr=22050)

        # Ensure the audio is exactly 30 seconds
        y = librosa.util.fix_length(y, size=30 * sr)

        # Split into 10 segments (3 seconds each)
        segments = np.array_split(y, 10)

        for segment_idx, segment in enumerate(segments, start=1):
            features = {}

            # Unique filename for each segment
            specificFilename = filename.replace(".wav", f"_{segment_idx}.wav")
            features["filename"] = specificFilename
            features["label"] = genre
            features["song_id"] = f"{genre}_{filename.split('.')[0]}"

            y_segment = segment  # Use the current segment for feature extraction

            # Compute chroma_stft
            chroma = librosa.feature.chroma_stft(y=y_segment, sr=sr)
            chroma_mean = np.mean(chroma, axis=1)
            chroma_var = np.var(chroma, axis=1)
            for i in range(len(chroma_mean)):
                features[f"chroma_stft_mean_{i}"] = chroma_mean[i]
                features[f"chroma_stft_var_{i}"] = chroma_var[i]

            # Spectral centroid
            centroid = librosa.feature.spectral_centroid(y=y_segment, sr=sr)
            features["centroid_mean"] = np.mean(centroid)
            features["centroid_var"] = np.var(centroid)

            # Spectral bandwidth
            bandwidth = librosa.feature.spectral_bandwidth(y=y_segment, sr=sr)
            features["bandwidth_mean"] = np.mean(bandwidth)
            features["bandwidth_var"] = np.var(bandwidth)

            # Spectral rolloff
            rolloff = librosa.feature.spectral_rolloff(y=y_segment, sr=sr)
            features["rolloff_mean"] = np.mean(rolloff)
            features["rolloff_var"] = np.var(rolloff)

            # Zero Crossing Rate
            zcr = librosa.feature.zero_crossing_rate(y=y_segment)
            features["zcr_mean"] = np.mean(zcr)
            features["zcr_var"] = np.var(zcr)

            # Harmonic and Percussive components
            y_harmonic, y_percussive = librosa.effects.hpss(y_segment)

            harmony = librosa.feature.chroma_stft(y=y_harmonic, sr=sr)
            harmony_mean = np.mean(harmony, axis=1)
            harmony_var = np.var(harmony, axis=1)
            for i in range(len(harmony_mean)):
                features[f"harmony_mean_{i}"] = harmony_mean[i]
                features[f"harmony_var_{i}"] = harmony_var[i]

            percussive = librosa.feature.chroma_stft(y=y_percussive, sr=sr)
            percussive_mean = np.mean(percussive, axis=1)
            percussive_var = np.var(percussive, axis=1)
            for i in range(len(percussive_mean)):
                features[f"percussive_mean_{i}"] = percussive_mean[i]
                features[f"percussive_var_{i}"] = percussive_var[i]

            # MFCCs and deltas
            mfccs = librosa.feature.mfcc(y=y_segment, sr=sr, n_mfcc=13)
            delta_mfccs = librosa.feature.delta(mfccs, order=1)
            delta2_mfccs = librosa.feature.delta(mfccs, order=2)
            for i in range(mfccs.shape[0]):
                features[f"mfcc_{i}_mean"] = np.mean(mfccs[i])
                features[f"mfcc_{i}_var"] = np.var(mfccs[i])
                features[f"delta_mfcc_{i}_mean"] = np.mean(delta_mfccs[i])
                features[f"delta_mfcc_{i}_var"] = np.var(delta_mfccs[i])
                features[f"delta2_mfcc_{i}_mean"] = np.mean(delta2_mfccs[i])
                features[f"delta2_mfcc_{i}_var"] = np.var(delta2_mfccs[i])

            # Spectral contrast
            spectral_contrast = librosa.feature.spectral_contrast(y=y_segment, sr=sr)
            for i in range(spectral_contrast.shape[0]):
                features[f"spectral_contrast_{i}_mean"] = np.mean(spectral_contrast[i])
                features[f"spectral_contrast_{i}_var"] = np.var(spectral_contrast[i])

            # Tonnetz
            tonnetz = librosa.feature.tonnetz(y=y_segment, sr=sr)
            for i in range(tonnetz.shape[0]):
                features[f"tonnetz_{i}_mean"] = np.mean(tonnetz[i])
                features[f"tonnetz_{i}_var"] = np.var(tonnetz[i])

            # Chroma CQT and CENS
            chroma_cqt = librosa.feature.chroma_cqt(y=y_segment, sr=sr)
            chroma_cqt_mean = np.mean(chroma_cqt, axis=1)
            chroma_cqt_var = np.var(chroma_cqt, axis=1)
            for i in range(len(chroma_cqt_mean)):
                features[f"chroma_cqt_mean_{i}"] = chroma_cqt_mean[i]
                features[f"chroma_cqt_var_{i}"] = chroma_cqt_var[i]

            chroma_cens = librosa.feature.chroma_cens(y=y_segment, sr=sr)
            chroma_cens_mean = np.mean(chroma_cens, axis=1)
            chroma_cens_var = np.var(chroma_cens, axis=1)
            for i in range(len(chroma_cens_mean)):
                features[f"chroma_cens_mean_{i}"] = chroma_cens_mean[i]
                features[f"chroma_cens_var_{i}"] = chroma_cens_var[i]

            # Chroma VQT
            chroma_vqt = librosa.feature.chroma_vqt(y=y_segment, sr=sr, intervals='equal')
            chroma_vqt_mean = np.mean(chroma_vqt, axis=1)
            chroma_vqt_var = np.var(chroma_vqt, axis=1)
            for i in range(len(chroma_vqt_mean)):
                features[f"chroma_vqt_mean_{i}"] = chroma_vqt_mean[i]
                features[f"chroma_vqt_var_{i}"] = chroma_vqt_var[i]

            # Mel spectrogram
            mel_spectrogram = librosa.feature.melspectrogram(y=y_segment, sr=sr)
            mel_spectrogram_mean = np.mean(mel_spectrogram, axis=1)
            mel_spectrogram_var = np.var(mel_spectrogram, axis=1)
            for i in range(len(mel_spectrogram_mean)):
                features[f"mel_spectrogram_mean_{i}"] = mel_spectrogram_mean[i]
                features[f"mel_spectrogram_var_{i}"] = mel_spectrogram_var[i]

            # Spectral flatness
            spectral_flatness = librosa.feature.spectral_flatness(y=y_segment)
            features["spectral_flatness_mean"] = np.mean(spectral_flatness)
            features["spectral_flatness_var"] = np.var(spectral_flatness)

            # Polynomial features
            poly_features = librosa.feature.poly_features(y=y_segment, sr=sr)
            poly_features_mean = np.mean(poly_features, axis=1)
            poly_features_var = np.var(poly_features, axis=1)
            for i in range(len(poly_features_mean)):
                features[f"poly_features_mean_{i}"] = poly_features_mean[i]
                features[f"poly_features_var_{i}"] = poly_features_var[i]

            # Tempo and tempogram
            tempo, _ = librosa.beat.beat_track(y=y_segment, sr=sr)
            features["tempo"] = tempo

            tempogram = librosa.feature.tempogram(y=y_segment, sr=sr)
            tempogram_mean = np.mean(tempogram, axis=1)
            tempogram_var = np.var(tempogram, axis=1)
            for i in range(len(tempogram_mean)):
                features[f"tempogram_mean_{i}"] = tempogram_mean[i]
                features[f"tempogram_var_{i}"] = tempogram_var[i]

            fourier_tempogram = librosa.feature.fourier_tempogram(y=y_segment, sr=sr)
            fourier_tempogram_mean = np.mean(fourier_tempogram, axis=1)
            fourier_tempogram_var = np.var(fourier_tempogram, axis=1)
            for i in range(len(fourier_tempogram_mean)):
                features[f"fourier_tempogram_mean_{i}"] = fourier_tempogram_mean[i]
                features[f"fourier_tempogram_var_{i}"] = fourier_tempogram_var[i]

            tempogram_ratio = librosa.feature.tempogram(y=y_segment, sr=sr)
            tempogram_ratio_mean = np.mean(tempogram_ratio, axis=1)
            tempogram_ratio_var = np.var(tempogram_ratio, axis=1)
            for i in range(len(tempogram_ratio_mean)):
                features[f"tempogram_ratio_mean_{i}"] = tempogram_ratio_mean[i]
                features[f"tempogram_ratio_var_{i}"] = tempogram_ratio_var[i]

            # Stack memory
            stack_memory = librosa.feature.stack_memory(y_segment)
            stack_memory_mean = np.mean(stack_memory, axis=1)
            stack_memory_var = np.var(stack_memory, axis=1)
            for i in range(len(stack_memory_mean)):
                features[f"stack_memory_mean_{i}"] = stack_memory_mean[i]
                features[f"stack_memory_var_{i}"] = stack_memory_var[i]
                
            yamnet_emb = get_yamnet_embedding(y_segment, sr)
            for i, val in enumerate(yamnet_emb):
                features[f"yamnet_emb_{i}"] = val
                
            # smile = opensmile.Smile(
            #     feature_set=opensmile.FeatureSet.eGeMAPSv02,
            #     feature_level=opensmile.FeatureLevel.LowLevelDescriptors
            # )
        
            # smile_features = smile.process_signal(y_segment, sr)
            # for key, value in smile_features.items():
            #     if isinstance(value, np.ndarray):
            #         features[key] = value.mean()
                    
            # features["opemsmile_mean"] = np.mean(smile_features)
            # features["opemsmile_var"] = np.var(smile_features)

            print(len(features))
            allSongs.append(features)
            print(f"Processed segment {segment_idx} of {filename} in genre {genre}")
            
            gc.collect()  # Run garbage collection to free memory

# Save to CSV
df = pd.DataFrame(allSongs)
os.makedirs(BASEPATH, exist_ok=True)
output_path = os.path.join(BASEPATH, "all_segmented_features.csv")
df.to_csv(output_path, index=False)
print(f"All features saved to {output_path}")

3472
Processed segment 1 of blues.00093.wav in genre blues
3472
Processed segment 2 of blues.00093.wav in genre blues
3472
Processed segment 3 of blues.00093.wav in genre blues
3472
Processed segment 4 of blues.00093.wav in genre blues
3472
Processed segment 5 of blues.00093.wav in genre blues
3472
Processed segment 6 of blues.00093.wav in genre blues
3472
Processed segment 7 of blues.00093.wav in genre blues
3472
Processed segment 8 of blues.00093.wav in genre blues
3472
Processed segment 9 of blues.00093.wav in genre blues
3472
Processed segment 10 of blues.00093.wav in genre blues
3472
Processed segment 1 of blues.00087.wav in genre blues
3472
Processed segment 2 of blues.00087.wav in genre blues


KeyboardInterrupt: 