In [3]:
import os

idmt_dataset_path = "data"

datapaths = []

for root, _, files in os.walk(idmt_dataset_path):  
    for filename in files:  # loop through files in the current directory
        if filename.endswith(".wav"):
            datapaths += [os.path.join(root, filename)]


# 20592 monophonic guitar notes
# 13860 polyphonic guitar sounds
# 34452 total guitar sounds
print(len(datapaths))
print(datapaths[:3])

34452
['data/idmt-smt-audio/Gitarre polyphon/Samples/EQ/P95-56270-1121-42023.wav', 'data/idmt-smt-audio/Gitarre polyphon/Samples/EQ/P64-49160-1122-42106.wav', 'data/idmt-smt-audio/Gitarre polyphon/Samples/EQ/P95-57210-1121-41934.wav']


In [None]:
import os
import csv

# Mappings
instrument_types = {
    "B": "Bass",
    "G": "Guitar"
}

instrument_models = {
    "1": "Yamaha BB604, setting 1",
    "2": "Yamaha BB604, setting 2",
    "3": "Warwick Corvette $$, setting 1",
    "4": "Warwick Corvette $$, setting 2",
    "6": "Schecter Diamond C-1 Classic, setting 1",
    "7": "Schecter Diamond C-1 Classic, setting 2",
    "8": "Chester Stratocaster, setting 1",
    "9": "Chester Stratocaster, setting 1"
}

playing_techniques = {
    "1": "Fingerstyle (soft/normal)",
    "2": "Fingerstyle (loud)",
    "3": "Pick",
    "4": "Pick (intervals)",  # polyphonic only
    "5": "Pick (triads/tetrads)"  # polyphonic only
}

polyphony_types = {
    "11": "Minor third",
    "12": "Major third",
    "13": "Perfect fourth",
    "14": "Perfect fifth",
    "15": "Minor seventh",
    "16": "Major seventh",
    "17": "Octave",
    "21": "Major triad",
    "22": "Minor triad",
    "23": "Sus4 triad",
    "24": "Power chord",
    "25": "Major seventh chord",
    "26": "Minor major seventh chord",
    "27": "Minor seventh chord"
}

effect_groups = {
    "1": "No Effect",
    "2": "Spatial Effect",
    "3": "Modulation Effect",
    "4": "Distortion Effect"
}

effects = {
    "11": "No Effect",
    "12": "No Effect, Amp Simulation",
    "21": "Feedback Delay",
    "22": "Slapback Delay",
    "23": "Reverb",
    "31": "Chorus",
    "32": "Flanger",
    "33": "Phaser",
    "34": "Tremolo",
    "35": "Vibrato",
    "41": "Distortion",
    "42": "Overdrive"
}

metadata = []

for filename in datapaths:
    if not (filename.endswith(".wav")):
        continue

    name = os.path.splitext(filename.split("/")[-1])[0]
    # print(name)
    parts = name.split('-')
    if len(parts) != 4:
        print(f"Skipping malformed file: {filename}")
        continue

    try:
        abc = parts[0]
        ddef = parts[1]
        ghhi = parts[2]
        file_id = parts[3]

        instrument_type = instrument_types.get(abc[0], "Unknown")
        instrument_model = instrument_models.get(abc[1], "Unknown")
        playing_technique = playing_techniques.get(abc[2], "Unknown")

        effect_group_id = ghhi[0]
        effect_id = ghhi[1:3]
        effect_setting = ghhi[3]
        effect_group_name = effect_groups.get(effect_group_id, "Unknown")
        effect_name = effects.get(effect_id, "Unknown")

        # Monophonic
        if playing_technique in ["Fingerstyle (soft/normal)", "Fingerstyle (loud)", "Pick"]:
            midi_pitch = ddef[0:2]
            string_number = ddef[2]
            fret_number = ddef[3:5]
            polyphony_type = ""

        # Polyphonic
        else:
            midi_pitch = ddef[0:2]
            polyphony_code = ddef[2:4]
            polyphony_type = polyphony_types.get(polyphony_code, "Unknown")
            string_number = ""
            fret_number = ""

        metadata.append({
            "filename": filename,
            "instrument_type": instrument_type,
            "instrument_model": instrument_model,
            "playing_technique": playing_technique,
            "midi_pitch": midi_pitch,
            "string_number": string_number,
            "fret_number": fret_number,
            "polyphony_type": polyphony_type,
            "effect_group_id": effect_group_id,
            "effect_group_name": effect_group_name,
            "effect_id": effect_id,
            "effect_name": effect_name,
            "effect_setting": effect_setting,
            "file_id": file_id
        })

    except Exception as e:
        print(f"Error parsing {filename}: {e}")
        continue

# Save to CSV
csv_file = "data/metadata.csv"
if metadata:
    with open(csv_file, "w", newline='', encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=metadata[0].keys())
        writer.writeheader()
        writer.writerows(metadata)
    print(f"Metadata saved to {csv_file}")
else:
    print("No metadata found. CSV not created.")


Metadata saved to data/metadata.csv
