In [1]:
from torch import nn

class TinyCNN(nn.Module):
    def __init__(self, num_classes=50):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 32, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((12, 32)),
            nn.Flatten(),
            nn.Linear(32 * 12 * 32, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        return self.model(x)

In [2]:
import torch
import matplotlib.pyplot as plt

def show_mel(mel, label):
    plt.imshow(mel.squeeze(), origin='lower', aspect='auto', cmap='magma')
    plt.title(f"Genres: {torch.nonzero(label).squeeze().tolist()}")
    plt.colorbar()
    plt.show()

In [3]:
from data.procesing import ParseData

ParseData("raw_30s_cleantags_125artists", "E:/SongsDataset/raw_30s_melspecs/", "E:/SongsDataset/tiny-melspec-dataset", features=96, chunks_per_batch=4096, chunk_size=256, per_label=25, labels_to_include=10, chunks_per_song=3)

Reading: 55215 tracks, 11217 albums, 3552 artists
There are 50 genres in this partition.
There are 15 moods/themes in this partition.
There are 20 instruments in this partition.


100%|██████████| 55215/55215 [00:03<00:00, 17575.94it/s]


In [4]:
from torch.utils.data import Dataset
import torch
class SmallDataset(Dataset):
    def __init__(self, songs, labels):
        self.songs = songs
        self.labels = labels

    def __len__(self):
        return len(self.songs)

    def __getitem__(self, idx):
        return self.songs[idx], self.labels[idx]

songs = torch.load("E:/SongsDataset/tiny-melspec-dataset/data/0686.pt")
labels = torch.load("E:/SongsDataset/tiny-melspec-dataset/genre_labels/0686.pt")

tiny_dataset = SmallDataset(songs, labels)

In [5]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(
    tiny_dataset,
    batch_size=10,
    shuffle=True
)

In [6]:
from tqdm import tqdm
from torch import optim
import torch.optim
import numpy as np

def train(model, train_dataloader, config):
    # Training setup
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)  # L2 regularization
    model.to("cuda", config.dtype)

    # Training loop
    for epoch in range(40):
        train_loss_total = 0
        for batch in tqdm(train_dataloader):
            inputs, labels = batch

            inputs = inputs.squeeze(0).unsqueeze(1).to("cuda", config.dtype)
            labels = labels.squeeze(0).to("cuda", config.dtype)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss_total += loss.item()

        train_loss_average = train_loss_total / len(train_dataloader)

        print(f"Epoch {epoch + 1}, Train Loss: {train_loss_average:.4f}")

        torch.save(model, f".\\{config.save_path}\\Classifier-Epoch-{epoch + 1}.pt")

def evaluate(model, dataloader, criterion, config):
    test_loss_total = 0

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in tqdm(dataloader):
            inputs, labels = batch

            inputs = inputs.squeeze(0).unsqueeze(1).to("cuda", config.dtype)
            labels = labels.squeeze(0).to("cuda", config.dtype)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            test_loss_total += loss.item()

            all_preds.extend(outputs.sigmoid().cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return test_loss_total / len(dataloader), all_preds, all_labels

def model_size(model):
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    return sum([np.prod(p.size()) for p in model_parameters])

In [7]:
import torch

class Config:
    # === General ===
    model_name = "Tiny-CNN-Overfitter"
    device = "cuda" if torch.cuda.is_available() else "cpu"
    dtype = torch.float32
    save_path = f"trained_models\\{model_name}\\"
    seed = 42

    # === Training ===
    num_epochs = 30
    batch_size = 24
    learning_rate = 1e-4
    weight_decay = 1e-4
    warmup_percent = 0.15
    max_grad_norm = 1.0
    log_every = 10  # steps between logs (optional)
    save_checkpoints = True

    # === Dataset ===
    use_masks = True
    num_workers = 4
    val_split = 0.2
    shuffle = True

In [8]:
model = TinyCNN(num_classes=50)
print(f"{model_size(model)} Parameters")
train(model, train_dataloader, Config)

6317938 Parameters


100%|██████████| 206/206 [00:01<00:00, 185.00it/s]


Epoch 1, Train Loss: 0.0588


RuntimeError: Parent directory .\trained_models\Tiny-CNN-Overfitter\ does not exist.

In [None]:
criterion = nn.BCEWithLogitsLoss()
test_loss_average, all_probs, all_labels = evaluate(model, train_dataloader, criterion, Config)

all_p_tensor = torch.stack([torch.tensor(x) for x in all_probs], dim=0).float()
all_l_tensor = torch.stack([torch.tensor(x) for x in all_labels], dim=0).int()
from sklearn.metrics import precision_recall_curve
import matplotlib.pyplot as plt
import numpy as np


def graph(probs, labels):
    # Store per-threshold values
    thresholds = np.linspace(0, 1, 200)
    precision_all = []
    recall_all = []

    for class_idx in range(probs.shape[1]):
        y_true = labels[:, class_idx]
        y_score = probs[:, class_idx]

        precision, recall, thresh = precision_recall_curve(y_true, y_score)

        # Interpolate to get precision/recall at uniform thresholds
        interp_precision = np.interp(thresholds, thresh, precision[:-1])  # precision[:-1] because it's len(thresh)+1
        interp_recall = np.interp(thresholds, thresh, recall[:-1])

        precision_all.append(interp_precision)
        recall_all.append(interp_recall)

    # Average across all classes
    precision_mean = np.mean(precision_all, axis=0)
    recall_mean = np.mean(recall_all, axis=0)

    # Plot
    plt.figure(figsize=(8, 6))
    plt.plot(thresholds, precision_mean, label='Precision')
    plt.plot(thresholds, recall_mean, label='Recall')
    plt.xlabel("Threshold")
    plt.ylabel("Score")
    plt.title("Precision and Recall vs Threshold (Macro-Averaged over Genres)")
    plt.legend()
    plt.grid(True)
    plt.show()


graph(all_p_tensor, all_l_tensor)