In [1]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm

DATASET_PATH = "genres_original" 
OUTPUT_CSV = "gtzan_features.csv"

In [2]:
def load_audio(file_path, sr=22050):
    y, sr = librosa.load(file_path, sr=sr)
    y, _ = librosa.effects.trim(y)
    return y, sr

In [3]:
def extract_features(y, sr, n_mfcc=20, n_fft=2048, hop_length=512):
    features = {}
    # ---------- 1. MFCC ----------
    mfcc = librosa.feature.mfcc(
        y=y, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length
    )
    for i in range(n_mfcc):
        mfcc_i = mfcc[i]
        features[f"mfcc_{i+1}_mean"] = np.mean(mfcc_i)
        features[f"mfcc_{i+1}_std"] = np.std(mfcc_i)

    # ---------- 2. Zero Crossing Rate ----------
    zcr = librosa.feature.zero_crossing_rate(y)
    features["zcr_mean"] = float(np.mean(zcr))
    features["zcr_std"] = float(np.std(zcr))

    # ---------- 3. Spectral Centroid ----------
    centroid = librosa.feature.spectral_centroid(
        y=y, sr=sr, n_fft=n_fft, hop_length=hop_length
    )
    features["centroid_mean"] = float(np.mean(centroid))
    features["centroid_std"] = float(np.std(centroid))

    # ---------- 4. Spectral Bandwidth ----------
    bandwidth = librosa.feature.spectral_bandwidth(
        y=y, sr=sr, n_fft=n_fft, hop_length=hop_length
    )
    features["bandwidth_mean"] = float(np.mean(bandwidth))
    features["bandwidth_std"] = float(np.std(bandwidth))

    # ---------- 5. Spectral Rolloff ----------
    rolloff = librosa.feature.spectral_rolloff(
        y=y, sr=sr, n_fft=n_fft, hop_length=hop_length
    )
    features["rolloff_mean"] = float(np.mean(rolloff))
    features["rolloff_std"] = float(np.std(rolloff))

    # ---------- 6. RMS Energy ----------
    rms = librosa.feature.rms(y=y, frame_length=n_fft, hop_length=hop_length)
    features["rms_mean"] = float(np.mean(rms))
    features["rms_std"] = float(np.std(rms))

    # ---------- 7. Tempo (BPM) ----------
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    features["tempo"] = float(tempo)

    return features

In [4]:
rows = []

genres = sorted(os.listdir(DATASET_PATH))
print("Detected genres：", genres)

for genre in genres:
    genre_path = os.path.join(DATASET_PATH, genre)
    if not os.path.isdir(genre_path):
        continue

    files = [f for f in os.listdir(genre_path) if f.endswith(".wav")]
    print(f"\nProcessing genre: {genre}, files: {len(files)}")

    for file_name in tqdm(files):
        file_path = os.path.join(genre_path, file_name)

        try:
            y, sr = load_audio(file_path)
            feats = extract_features(y, sr)
            feats["label"] = genre
            feats["filename"] = file_name
            rows.append(feats)
        except Exception as e:
            print(f"Error processing {file_path}: {e}")



df = pd.DataFrame(rows)
print("\nFeature matrix shape:", df.shape)
display(df.head())

df.to_csv(OUTPUT_CSV, index=False, encoding="utf-8")
print(f"\nSaved features to: {OUTPUT_CSV}")

Detected genres： ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

Processing genre: blues, files: 100


  features["tempo"] = float(tempo)
100%|██████████| 100/100 [00:24<00:00,  4.05it/s]



Processing genre: classical, files: 100


100%|██████████| 100/100 [00:22<00:00,  4.51it/s]



Processing genre: country, files: 100


100%|██████████| 100/100 [00:22<00:00,  4.48it/s]



Processing genre: disco, files: 100


100%|██████████| 100/100 [00:25<00:00,  3.96it/s]



Processing genre: hiphop, files: 100


100%|██████████| 100/100 [00:24<00:00,  4.13it/s]



Processing genre: jazz, files: 100


  y, sr = librosa.load(file_path, sr=sr)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error processing genres_original\jazz\jazz.00054.wav: 


100%|██████████| 100/100 [00:24<00:00,  4.06it/s]



Processing genre: metal, files: 100


100%|██████████| 100/100 [00:21<00:00,  4.64it/s]



Processing genre: pop, files: 100


100%|██████████| 100/100 [00:22<00:00,  4.48it/s]



Processing genre: reggae, files: 100


100%|██████████| 100/100 [00:22<00:00,  4.41it/s]



Processing genre: rock, files: 100


100%|██████████| 100/100 [00:26<00:00,  3.75it/s]


Feature matrix shape: (999, 53)





Unnamed: 0,mfcc_1_mean,mfcc_1_std,mfcc_2_mean,mfcc_2_std,mfcc_3_mean,mfcc_3_std,mfcc_4_mean,mfcc_4_std,mfcc_5_mean,mfcc_5_std,...,centroid_std,bandwidth_mean,bandwidth_std,rolloff_mean,rolloff_std,rms_mean,rms_std,tempo,label,filename
0,-113.598824,50.688946,121.570671,17.200207,-19.162262,15.348761,42.363937,12.289782,-6.362266,12.961207,...,360.202005,2002.412407,292.975102,3805.72303,949.343413,0.130184,0.053183,123.046875,blues,blues.00000.wav
1,-207.523834,88.142525,123.985138,23.662491,8.947019,23.923552,35.867149,16.270117,2.909594,16.732485,...,613.11949,2038.987608,462.49876,3550.713616,1725.778347,0.095908,0.048711,67.999589,blues,blues.00001.wav
2,-90.757164,57.601101,140.440872,22.55784,-29.084547,20.29937,31.686693,11.998093,-13.976547,12.476432,...,395.564168,1747.754087,276.216244,3042.410115,885.511646,0.175473,0.052449,161.499023,blues,blues.00002.wav
3,-199.575134,74.217697,150.086105,21.361393,5.663404,16.034643,26.855278,12.584162,1.770071,16.369904,...,429.378632,1596.422564,408.107638,2184.879029,1221.915647,0.14104,0.079672,63.024009,blues,blues.00003.wav
4,-160.354172,72.104813,126.20948,29.210808,-35.581394,18.276552,22.139256,13.919527,-32.473549,18.341904,...,585.874983,1748.410758,297.28556,3579.957471,1253.928347,0.091501,0.048013,135.999178,blues,blues.00004.wav



Saved features to: gtzan_features.csv
