In [1]:
import os
import numpy as np
import librosa
from tqdm import tqdm

In [2]:
DATASET_PATH = "../Data/Raw/genres_original"

In [5]:
def extract_features(file_path, sr=22050, duration=30):
    try:
        y, sr = librosa.load(file_path, sr=sr, mono=True, duration=duration)

        expected_length = sr * duration
        if len(y) < expected_length:
            y = np.pad(y, (0, expected_length - len(y)))
        else:
            y = y[:expected_length]

        features = []

        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        features.extend(np.mean(mfcc, axis=1))
        features.extend(np.std(mfcc, axis=1))

        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr, fmin=200.0)

        for spec in [
            spectral_centroid,
            spectral_bandwidth,
            spectral_rolloff,
            spectral_contrast
        ]:
            features.append(np.mean(spec))
            features.append(np.std(spec))

        zcr = librosa.feature.zero_crossing_rate(y)
        rms = librosa.feature.rms(y=y)

        features.append(np.mean(zcr))
        features.append(np.std(zcr))
        features.append(np.mean(rms))
        features.append(np.std(rms))

        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        features.extend(np.mean(chroma, axis=1))

        tempo = librosa.feature.rhythm.tempo(y=y, sr=sr)[0]
        features.append(tempo)

        return np.array(features, dtype=np.float32)

    except Exception as e:
        print(f"ERROR processing {file_path}")
        print(e)
        return None

In [6]:
test_file = "../Data/Raw/genres_original/pop/pop.00027.wav"
features = extract_features(test_file)

print(features.shape if features is not None else "Failed")

(51,)


In [7]:
X = []
y = []
failed_files = []

genres = sorted([
    g for g in os.listdir(DATASET_PATH)
    if os.path.isdir(os.path.join(DATASET_PATH, g))
])

for label, genre in enumerate(genres):
    genre_path = os.path.join(DATASET_PATH, genre)

    for file in tqdm(os.listdir(genre_path), desc=f"Processing {genre}"):
        if not file.lower().endswith(".wav"):
            continue

        file_path = os.path.join(genre_path, file)
        features = extract_features(file_path)

        if features is None:
            failed_files.append(file_path)
            continue

        X.append(features)
        y.append(label)

X = np.array(X)
y = np.array(y)

print("Final dataset shape:", X.shape)
print("Corrupted files skipped:", len(failed_files))

Processing blues: 100%|███████████████████████| 100/100 [00:16<00:00,  5.93it/s]
Processing classical: 100%|███████████████████| 100/100 [00:16<00:00,  5.93it/s]
Processing country: 100%|█████████████████████| 100/100 [00:16<00:00,  5.92it/s]
Processing disco: 100%|███████████████████████| 100/100 [00:17<00:00,  5.85it/s]
Processing hiphop: 100%|██████████████████████| 100/100 [00:16<00:00,  5.92it/s]
Processing jazz: 100%|██████████████████████████| 99/99 [00:16<00:00,  5.85it/s]
Processing metal: 100%|███████████████████████| 100/100 [00:17<00:00,  5.82it/s]
Processing pop: 100%|█████████████████████████| 100/100 [00:17<00:00,  5.75it/s]
Processing reggae: 100%|██████████████████████| 100/100 [00:17<00:00,  5.65it/s]
Processing rock: 100%|████████████████████████| 100/100 [00:17<00:00,  5.69it/s]

Final dataset shape: (999, 51)
Corrupted files skipped: 0





In [8]:
from pathlib import Path
BASE_DIR = Path.cwd().parent
PROCESSED_DIR = BASE_DIR / "Data" / "Processed"
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)

print("Processed data will be saved to:")
print(PROCESSED_DIR)

Processed data will be saved to:
/Users/apple/ml-env/Audio_Genre/Data/Processed


In [9]:
import json

metadata = {
    "num_samples": int(X.shape[0]),
    "num_features": int(X.shape[1]),
    "num_classes": len(np.unique(y)),
    "feature_description": {
        "mfcc": "13 MFCCs (mean + std)",
        "spectral": ["centroid", "bandwidth", "rolloff", "contrast"],
        "time_domain": ["zcr", "rms"],
        "chroma": "12-bin chroma STFT (mean)",
        "tempo": "global tempo estimate"
    }
}

with open(PROCESSED_DIR / "metadata.json", "w") as f:
    json.dump(metadata, f, indent=4)

print("metadata.json saved")

metadata.json saved
