Import Libraries

In [10]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm

In [11]:
data_path = "/Users/js/Desktop/Music Genre Classification/Data/genres_original" 
output_csv = "/Users/js/Desktop/Music Genre Classification/Data/music_features.csv"

Functions

- Load the audio files

In [12]:
def load_audio(file_path, duration=30):
    """Loads an audio file and returns the audio time series and sampling rate."""
    try:
        y, sr = librosa.load(file_path, duration=duration)
        return y, sr
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None, None

- Extract the features

In [13]:
def extract_features(y, sr):
    """Extracts multiple audio features from an audio time series."""
    if y is None or sr is None:
        return None
    
    try:
        mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0)
        chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
        spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr).T, axis=0)
        tempo = librosa.beat.tempo(y=y, sr=sr)[0]
        mel_spec = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
        
        return np.hstack([mfccs, chroma, spectral_contrast, tempo, mel_spec])
    except Exception as e:
        print(f"Error extracting features: {e}")
        return None

- Process the audio files

In [14]:
def process_audio_files(data_path):
    """Iterates through all audio files in genre folders and extracts features."""
    data = []
    genres = os.listdir(data_path)
    
    for genre in tqdm(genres, desc="Processing genres"):
        genre_path = os.path.join(data_path, genre)
        if os.path.isdir(genre_path):
            for file in os.listdir(genre_path):
                if file.endswith(".wav"):
                    file_path = os.path.join(genre_path, file)
                    y, sr = load_audio(file_path)
                    features = extract_features(y, sr)
                    if features is not None:
                        data.append([genre] + list(features))
    return data

- Save the extractes features

In [15]:
def save_features_to_csv(data, output_csv):
    """Saves extracted features to a CSV file."""
    columns = ["Genre"] + [f"MFCC_{i}" for i in range(13)] + [f"Chroma_{i}" for i in range(12)] + \
              [f"Spectral_Contrast_{i}" for i in range(7)] + ["Tempo"] + [f"Mel_{i}" for i in range(128)]
    df = pd.DataFrame(data, columns=columns)
    df.to_csv(output_csv, index=False)
    print(f"Feature extraction complete! Data saved to {output_csv}")

In [16]:
if __name__ == "__main__":
    data = process_audio_files(data_path)
    save_features_to_csv(data, output_csv)

	This function was moved to 'librosa.feature.rhythm.tempo' in librosa version 0.10.0.
	This alias will be removed in librosa version 1.0.
  tempo = librosa.beat.tempo(y=y, sr=sr)[0]
Processing genres: 100%|██████████| 10/10 [03:13<00:00, 19.34s/it]

Feature extraction complete! Data saved to /Users/js/Desktop/Music Genre Classification/Data/music_features.csv



