## **Libraries**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [28]:
import os
import json
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

In [13]:
cqt_dir="/content/drive/MyDrive/Automatic Guitar Transcription/Data Set/GuitarSet/cqt"
json_dir="/content/drive/MyDrive/Automatic Guitar Transcription/Data Set/GuitarSet/merge_annotation"

## **Dataset**

In [29]:
class FretnetSlidingDataset(Dataset):
    def __init__(self, cqt, labels, context_size=9):
        self.cqt = cqt  # (6, T, freq)
        self.labels = labels  # (T, ...)
        self.context_size = context_size
        self.pad = context_size // 2
        self.T = min(cqt.shape[1], labels.shape[0])
        self.cqt_padded = np.pad(cqt, ((0,0),(self.pad,self.pad),(0,0)), mode='edge')

    def __len__(self):
        return self.T

    def __getitem__(self, idx):
        x = self.cqt_padded[:, idx:idx+self.context_size, :]
        y = self.labels[idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)


In [30]:
def load_data_sample(cqt_path, json_path, num_strings=6, num_frets=19):
    cqt = np.load(cqt_path)  # (6, T, freq)
    with open(json_path, 'r') as f:
        data = json.load(f)

    frame_count = len(data["times"])

    tablature = np.array(data["tablature"]).T
    Y_tab = np.zeros((frame_count, num_strings * (num_frets + 1)), dtype=np.float32)
    for t in range(frame_count):
        for s in range(num_strings):
            f = tablature[t][s]
            if 0 <= f <= num_frets:
                idx_label = s * (num_frets + 1) + int(f)
                Y_tab[t, idx_label] = 1.0

    min_len = min(cqt.shape[1], frame_count)
    cqt = cqt[:, :min_len, :]
    Y_tab = Y_tab[:min_len]

    return cqt, Y_tab

In [31]:
cqt_dir = "/content/drive/MyDrive/Automatic Guitar Transcription/Data Set/GuitarSet/cqt"
json_dir = "/content/drive/MyDrive/Automatic Guitar Transcription/Data Set/GuitarSet/merge_annotation"

cqt_path = os.path.join(cqt_dir, "00_BN1-129-Eb_comp_mix_cqt.npy")
json_path = os.path.join(json_dir, "00_BN1-129-Eb_comp_mix_fretnet.json")

cqt, labels = load_data_sample(cqt_path, json_path)

dataset = FretnetSlidingDataset(cqt, labels, context_size=9)
loader = DataLoader(dataset, batch_size=4, shuffle=True)

for xb, yb in loader:
    print("X batch:", xb.shape)  # (B, 6, 9, 144)
    print("Y batch:", yb.shape)  # (B, 120)
    break

X batch: torch.Size([4, 6, 9, 144])
Y batch: torch.Size([4, 120])


## **Model**

In [32]:
class SimpleFretNet(nn.Module):
    def __init__(self, num_strings=6, num_frets=19):
        super().__init__()
        self.num_outputs = num_strings * (num_frets + 1)
        self.conv1 = nn.Conv2d(6, 16, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.pool = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.fc_shared = nn.Linear(32 * 2 * 36, 256)
        self.fc_tab = nn.Linear(256, self.num_outputs)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool(x)
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc_shared(x))
        tab_logits = self.fc_tab(x)  # sigmoid kaldırıldı
        return tab_logits


## **Train**

In [33]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleFretNet().to(device)  # Model önce tanımlanmalı
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Sonra optimizer oluşturulmalı
loss_fn = nn.BCEWithLogitsLoss()  # Sigmoid içeren loss

In [34]:

def train_one_epoch(model, loader, optimizer):
    model.train()
    total_loss = 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        outputs = model(xb)
        loss = loss_fn(outputs, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * xb.size(0)
    return total_loss / len(loader.dataset)


In [35]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleFretNet().to(device)

num_epochs = 10
for epoch in range(num_epochs):
    loss = train_one_epoch(model, loader, optimizer)
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {loss:.4f}")

Epoch 1/10 - Loss: 0.7062
Epoch 2/10 - Loss: 0.7063
Epoch 3/10 - Loss: 0.7062
Epoch 4/10 - Loss: 0.7063
Epoch 5/10 - Loss: 0.7063
Epoch 6/10 - Loss: 0.7063
Epoch 7/10 - Loss: 0.7063
Epoch 8/10 - Loss: 0.7063
Epoch 9/10 - Loss: 0.7062
Epoch 10/10 - Loss: 0.7062


In [25]:
for xb, yb in loader:
    print("Input shape:", xb.shape)
    print("Label shape:", yb.shape)
    print("Label sample values:", yb[0][:10])
    break

outputs = model(xb.to(device))
print("Output sample:", outputs[0][:10])


Input shape: torch.Size([4, 6, 9, 144])
Label shape: torch.Size([4, 120])
Label sample values: tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
Output sample: tensor([0.5146, 0.4445, 0.5171, 0.4918, 0.5543, 0.4667, 0.5434, 0.4878, 0.5191,
        0.6355], grad_fn=<SliceBackward0>)
