In [64]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten

# Mount drive
from google.colab import drive
drive.mount('/content/drive/')

# Path to your dataset
dataset1_path = '/content/drive/MyDrive/guitar-dataset/IDMT-SMT-GUITAR_V2/dataset1'
print("Contents of", dataset1_path)
print(os.listdir(dataset1_path))

# Load audio dataset UNBATCHED
train_dataset, test_dataset = tf.keras.utils.audio_dataset_from_directory(
    directory=dataset1_path,
    batch_size=None,  # <-- IMPORTANT
    validation_split=0.2,
    seed=0,
    output_sequence_length=16000,
    subset='both'
)

# Inspect example
for audio, label in train_dataset.take(1):
    print("Raw audio shape:", audio.shape)   # should be (16000, 1)
    print("Label:", label.numpy())


Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
Contents of /content/drive/MyDrive/guitar-dataset/IDMT-SMT-GUITAR_V2/dataset1
['Ibanez Power Strat Clean Bridge HU', 'Ibanez Power Strat Clean Neck HU', 'Ibanez Power Strat Clean Bridge HU Chords', 'Fender Strat Clean Neck SC Chords', 'Ibanez Power Strat Clean Bridge+Neck SC', 'Fender Strat Clean Neck SC']
Found 400 files belonging to 6 classes.
Using 320 files for training.
Using 80 files for validation.
Raw audio shape: (16000, 1)
Label: 3


In [65]:
import os
import re
import shutil

# Source: original dataset1 (instrument / pickup folders etc.)
src_root = "/content/drive/MyDrive/guitar-dataset/IDMT-SMT-GUITAR_V2/dataset1"

# Destination: new dataset grouped by chord label
dst_root = "/content/drive/MyDrive/guitar-dataset/chords_by_label"
os.makedirs(dst_root, exist_ok=True)

print("Source root:", src_root)
print("Destination root:", dst_root)

Source root: /content/drive/MyDrive/guitar-dataset/IDMT-SMT-GUITAR_V2/dataset1
Destination root: /content/drive/MyDrive/guitar-dataset/chords_by_label


In [66]:
def get_chord_label_from_filename(filename: str) -> str | None:
    """
    Extract chord label from filenames like:
    '1-E1-Major 00.wav', '2-E1-Minor 01.wav', '3-A2-Major 06.wav'

    Returns labels like: 'E_major', 'A_minor', etc.
    """
    # Remove extension
    base = os.path.splitext(filename)[0]   # e.g. '1-E1-Major 00'

    # Take just the part before the space: '1-E1-Major'
    first_part = base.split(" ")[0]

    # Pattern: <index>-<Note><something>-<Major|Minor>
    # Example: 1-E1-Major  → groups: 'E1', 'Major'
    m = re.match(r"^\d+-([A-G][b#]?\d*)-(Major|Minor)", first_part)
    if not m:
        return None

    note_token = m.group(1)   # 'E1', 'A2', etc.
    quality = m.group(2)      # 'Major' or 'Minor'

    # Root note: letter + optional accidental, ignore trailing digit(s)
    m_note = re.match(r"^([A-G][b#]?)", note_token)
    if not m_note:
        return None

    root = m_note.group(1)  # 'E', 'A', 'Eb', 'F#', ...
    label = f"{root}_{quality.lower()}"   # e.g. 'E_major'
    return label

# Quick sanity check
test_names = [
    "1-E1-Major 00.wav",
    "2-E1-Minor 07.wav",
    "3-A2-Major 06.wav",
]

for name in test_names:
    print(name, "→", get_chord_label_from_filename(name))


1-E1-Major 00.wav → E_major
2-E1-Minor 07.wav → E_minor
3-A2-Major 06.wav → A_major


In [67]:
#THIS CODE PUTS CHORDS INTO FOLDERS
copied_count = 0
skipped_count = 0

for dirpath, dirnames, filenames in os.walk(src_root):
    # Only look inside 'audio' folders
    if os.path.basename(dirpath).lower() != "audio":
        continue

    print("Scanning:", dirpath)
    for fname in filenames:
        if not fname.lower().endswith(".wav"):
            continue

        chord_label = get_chord_label_from_filename(fname)
        if chord_label is None:
            print("  [SKIP] Could not parse chord from:", fname)
            skipped_count += 1
            continue

        # Make destination folder for this chord
        dst_dir = os.path.join(dst_root, chord_label)
        os.makedirs(dst_dir, exist_ok=True)

        src_path = os.path.join(dirpath, fname)
        dst_path = os.path.join(dst_dir, fname)

        # Copy file (use copy2 to preserve metadata)
        shutil.copy2(src_path, dst_path)
        copied_count += 1

print("\nDone.")
print("Copied:", copied_count, "files")
print("Skipped (no chord parsed):", skipped_count)


Scanning: /content/drive/MyDrive/guitar-dataset/IDMT-SMT-GUITAR_V2/dataset1/Ibanez Power Strat Clean Bridge HU/audio
  [SKIP] Could not parse chord from: G53-49204-1111-252.wav
  [SKIP] Could not parse chord from: G53-45200-1111-248.wav
  [SKIP] Could not parse chord from: G53-48203-1111-251.wav
  [SKIP] Could not parse chord from: G53-47202-1111-250.wav
  [SKIP] Could not parse chord from: G53-46201-1111-249.wav
  [SKIP] Could not parse chord from: G53-51206-1111-254.wav
  [SKIP] Could not parse chord from: G53-54209-1111-257.wav
  [SKIP] Could not parse chord from: G53-61502-1111-289.wav
  [SKIP] Could not parse chord from: G53-57212-1111-260.wav
  [SKIP] Could not parse chord from: G53-68509-1111-296.wav
  [SKIP] Could not parse chord from: G53-60501-1111-288.wav
  [SKIP] Could not parse chord from: G53-62503-1111-290.wav
  [SKIP] Could not parse chord from: G53-52207-1111-255.wav
  [SKIP] Could not parse chord from: G53-50205-1111-253.wav
  [SKIP] Could not parse chord from: G53-55

In [68]:
chord_dataset_path = "/content/drive/MyDrive/guitar-dataset/chords_by_label"

train_dataset, test_dataset = tf.keras.utils.audio_dataset_from_directory(
    directory=chord_dataset_path,
    batch_size=None,
    validation_split=0.2,
    seed=0,
    output_sequence_length=16000,
    subset='both'
)

class_names = train_dataset.class_names
print("Chord classes:", class_names)

for audio, label in train_dataset.take(1):
    print("Raw audio shape:", audio.shape)
    print("Label index:", label.numpy())
    print("Label name:", class_names[label.numpy()])


Found 44 files belonging to 4 classes.
Using 36 files for training.
Using 8 files for validation.
Chord classes: ['A_major', 'A_minor', 'E_major', 'E_minor']
Raw audio shape: (16000, 1)
Label index: 3
Label name: E_minor


In [69]:
for audio, label in train_dataset.take(5):
    print("Audio shape:", audio.shape)   # (16000, 1)
    print("Label:", label.numpy())       # scalar like 0,1,2...

Audio shape: (16000, 1)
Label: 0
Audio shape: (16000, 1)
Label: 0
Audio shape: (16000, 1)
Label: 2
Audio shape: (16000, 1)
Label: 2
Audio shape: (16000, 1)
Label: 1


### Create a function to convert audio to spectrograms

In [70]:
def get_spectrogram(waveform):
    waveform = tf.squeeze(waveform, axis=-1)  # (16000,) from (16000,1)

    spectrogram = tf.signal.stft(
        waveform,
        frame_length=255,
        frame_step=128
    )
    spectrogram = tf.abs(spectrogram)
    spectrogram = spectrogram[..., tf.newaxis]  # add channel dim
    spectrogram = tf.image.resize(spectrogram, [124, 128])

    return spectrogram


In [71]:
def make_spec_ds(ds, batch_size=64):
    ds = ds.map(
        lambda audio, label: (get_spectrogram(audio), label),
        num_parallel_calls=tf.data.AUTOTUNE
    )
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

train_spectrogram_ds = make_spec_ds(train_dataset)
test_spectrogram_ds = make_spec_ds(test_dataset)

# Inspect a batch
for specs, labels in train_spectrogram_ds.take(1):
    print("Spectrogram batch shape:", specs.shape)
    print("Labels batch shape:", labels.shape)
    break


Spectrogram batch shape: (36, 124, 128, 1)
Labels batch shape: (36,)


CNN

In [72]:
num_classes = 6
input_shape_for_model = (124, 128, 1)

model = Sequential([
    tf.keras.layers.Input(shape=input_shape_for_model),
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

model.summary()


In [73]:
EPOCHS = 5

history = model.fit(
    train_spectrogram_ds,
    validation_data=test_spectrogram_ds,
    epochs=EPOCHS,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=2,
            restore_best_weights=True,
            verbose=1
        )
    ]
)


Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.0556 - loss: 1.7906 - val_accuracy: 0.0000e+00 - val_loss: 1.8277
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 209ms/step - accuracy: 0.3056 - loss: 1.5469 - val_accuracy: 0.0000e+00 - val_loss: 1.9249
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 242ms/step - accuracy: 0.3056 - loss: 1.4214 - val_accuracy: 0.2500 - val_loss: 1.6130
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 229ms/step - accuracy: 0.5556 - loss: 1.3012 - val_accuracy: 0.3750 - val_loss: 1.3358
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step - accuracy: 0.6389 - loss: 1.2286 - val_accuracy: 0.1250 - val_loss: 1.4500
Restoring model weights from the end of the best epoch: 4.


In [74]:
def load_and_preprocess(filepath):
    # Load .wav
    audio_binary = tf.io.read_file(filepath)
    audio, sr = tf.audio.decode_wav(audio_binary)

    # Squeeze to (samples,)
    audio = tf.squeeze(audio, axis=-1)

    # Pad/trim to 16000 samples (1 sec)
    audio = audio[:16000]
    audio = tf.pad(audio, [[0, max(0, 16000 - tf.shape(audio)[0])]])

    # Back to (16000, 1)
    audio = tf.expand_dims(audio, axis=-1)

    # Convert to spectrogram
    spec = get_spectrogram(audio)

    # Add batch dim → (1, 124, 128, 1)
    spec = tf.expand_dims(spec, axis=0)
    return spec

def predict_chord(filepath):
    spec = load_and_preprocess(filepath)
    pred = model.predict(spec)
    idx = np.argmax(pred)
    confidence = float(np.max(pred))

    print("Predicted class index:", idx)
    print("Predicted label:", class_names[idx])
    print("Confidence:", confidence)

In [75]:
# Get the class names from the training dataset folders
class_names = train_dataset.class_names
print("Class names:", class_names)

predict_chord("/content/drive/MyDrive/guitar-dataset/IDMT-SMT-GUITAR_V2/dataset1/Fender Strat Clean Neck SC Chords/audio/3-A2-Major 01.wav")

Class names: ['A_major', 'A_minor', 'E_major', 'E_minor']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 421ms/step
Predicted class index: 1
Predicted label: A_minor
Confidence: 0.2706142067909241
