In [1]:
import json
import os
import math
import librosa

DATASET_PATH = "/exchange/eds_ai_hs25/gtzan/Data/genres_original"
JSON_PATH = "data_gtzan_mfcc.json"
BAD_FILES_PATH = "bad_files.txt"

SAMPLE_RATE = 22050
TRACK_DURATION = 30
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION


def save_mfcc(dataset_path, json_path, bad_files_path,
              num_mfcc=13, n_fft=2048, hop_length=512, num_segments=10):

    data = {"mapping": [], "labels": [], "mfcc": []}
    bad_files = []  # will store dicts with path + error

    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    expected_num_frames = math.ceil(samples_per_segment / hop_length)

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        if dirpath != dataset_path:
            genre = os.path.basename(dirpath)
            data["mapping"].append(genre)
            print(f"\nProcessing genre: {genre}")

            for f in sorted(filenames):
                if not f.lower().endswith(".wav"):
                    continue

                file_path = os.path.join(dirpath, f)

                # Try loading; if it fails, log & skip
                try:
                    signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
                except Exception as e:
                    bad_files.append({"path": file_path, "error": repr(e)})
                    print(f"SKIP (cannot read): {file_path}")
                    continue

                # Ensure consistent 30s length
                if len(signal) < SAMPLES_PER_TRACK:
                    signal = librosa.util.fix_length(signal, size=SAMPLES_PER_TRACK)
                else:
                    signal = signal[:SAMPLES_PER_TRACK]

                # Segment + MFCC extraction
                for d in range(num_segments):
                    start = samples_per_segment * d
                    finish = start + samples_per_segment

                    mfcc = librosa.feature.mfcc(
                        y=signal[start:finish],
                        sr=sr,
                        n_mfcc=num_mfcc,
                        n_fft=n_fft,
                        hop_length=hop_length
                    ).T

                    if len(mfcc) == expected_num_frames:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i - 1)

    # Save MFCC data
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=2)

    # Save list of bad files (path + error) for inspection
    with open(bad_files_path, "w") as fp:
        for item in bad_files:
            fp.write(f"{item['path']}\t{item['error']}\n")

    print("\nDone.")
    print(f"Saved MFCC JSON to: {json_path}")
    print(f"Bad files: {len(bad_files)} (written to {bad_files_path})")
    print(f"Total MFCC samples: {len(data['mfcc'])}")
    print(f"Num classes (folders): {len(data['mapping'])}")


if __name__ == "__main__":
    save_mfcc(DATASET_PATH, JSON_PATH, BAD_FILES_PATH, num_segments=10)



Processing genre: blues

Processing genre: classical

Processing genre: country

Processing genre: disco

Processing genre: hiphop

Processing genre: jazz


  signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


SKIP (cannot read): /exchange/eds_ai_hs25/gtzan/Data/genres_original/jazz/jazz.00054.wav

Processing genre: metal

Processing genre: pop

Processing genre: reggae

Processing genre: rock

Done.
Saved MFCC JSON to: data_gtzan_mfcc.json
Bad files: 1 (written to bad_files.txt)
Total MFCC samples: 9990
Num classes (folders): 10
