In [1]:
import librosa
import pandas as pd
import os
from ipywidgets import IntProgress
from IPython.display import display, clear_output

In [2]:
# function to extract audio features
def extract_features(file_path):
    y, sr = librosa.load(file_path)
    chroma_stft_mean = librosa.feature.chroma_stft(y=y, sr=sr).mean()
    chroma_stft_var = librosa.feature.chroma_stft(y=y, sr=sr).var()
    rms_mean = librosa.feature.rms(y=y).mean()
    rms_var = librosa.feature.rms(y=y).var()
    spectral_centroid_mean = librosa.feature.spectral_centroid(y=y, sr=sr).mean()
    spectral_centroid_var = librosa.feature.spectral_centroid(y=y, sr=sr).var()
    zcr = librosa.zero_crossings(y, pad=False)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.85)
    return [chroma_stft_mean, chroma_stft_var, rms_mean, rms_var, spectral_centroid_mean, spectral_centroid_var]

In [3]:
# testing 1 song
sample_path = 'Dataset/MP3-Example/Latin/Latin-TRAFNQP128F92FAA26.mp3'
print(extract_features(sample_path))

[np.float32(0.3924548), np.float32(0.086044624), np.float32(0.13579641), np.float32(0.0013561117), np.float64(2052.0418230378855), np.float64(114338.99866117635)]


In [7]:
# test for all data
data = []
drtr = 'Dataset/MP3-Example'

In [8]:
# progress bar
total_files = sum(len(files) for _, _, files in os.walk(drtr))
progress = IntProgress(min=0, max=total_files)
display(progress)

IntProgress(value=0, max=1500)

In [9]:

# accessing through all subdirectories
for genre_folder, subdirs, files in os.walk(drtr):
    for file_name in files:
        if file_name.endswith(".mp3"):
            # Get full path to the MP3 files
            file_path = os.path.join(genre_folder, file_name)
            # Extract track_id from the file name
            track_id = file_name.split('-')[1].split('.')[0]  # following 'genre-track_id.mp3' format
            # Extract features
            features = extract_features(file_path)
            # Append track_id and features to data
            data.append([track_id] + features)

            # keep track of progress
            progress.value += 1
            clear_output(wait=True)
            display(progress)

IntProgress(value=1500, max=1500)

In [10]:
# save in CSV
df = pd.DataFrame(data, columns=['track_id', 'chroma_stft_mean', 'chroma_stft_var','rms_mean', 'rms_var', 'spectral_centroid_mean','spectral_centroid_var' ])
df.to_csv('GeneratedData/chroma_rms_spectral_centroid.csv', index=False)

In [11]:
print("Feature extraction complete. Data saved to 'GeneratedData/chroma_rms_spectral_centroid.csv'.")

Feature extraction complete. Data saved to 'GeneratedData/chroma_rms_spectral_centroid.csv'.
