In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

import json
import numpy as np
import pandas as pd
from tqdm import tqdm

import os

In [2]:
df = pd.read_csv("train_data/train_x.csv")
df.shape

(2907214, 13)

In [None]:
# Metal GPU support
if torch.backends.mps.is_available():
    device = torch.device("mps")  # "mps" stands for Metal Performance Shaders (Apple's Metal API)
    print("Using Apple Metal GPU.")
else:
    device = torch.device("cpu")  # Fallback to CPU
    print("Using CPU.")

Dataset

In [None]:
from scipy.ndimage import gaussian_filter1d


def gaussian_smoothing(chroma_features, sigma=1):
    """
    Apply Gaussian smoothing to chroma features.
    :param chroma_features: numpy array of shape (num_samples, num_features)
    :param sigma: standard deviation for the Gaussian kernel
    :return: Smoothed chroma features
    """
    smoothed_features = np.copy(chroma_features)
    for i in range(chroma_features.shape[1]):  # Apply Gaussian smoothing to each chroma feature
        smoothed_features[:, i] = gaussian_filter1d(chroma_features[:, i], sigma=sigma)
    return smoothed_features


class Music_Dataset(Dataset):
    def __init__(self, csv_path, window_size, step_size):
        """
        Args:
            data_path (str): Path to the CSV file containing the chroma features and labels.
            window_size (int): The length of each sequence (number of chroma features in a window).
            step_size (int): The number of steps the window moves (controls overlap).
        """
        self.data = pd.read_csv(csv_path)

        # process csv file content
        self.chroma_features = self.data.iloc[::-1].values
        # apply smoothing
        self.chroma_features = gaussian_smoothing(self.chroma_features, sigma=1)
        self.labels = self.data.iloc[::-1].values

        # parameters
        self.window_size = window_size
        self.step_size = step_size

    def __len__(self):
        # total number of windows spanning the dataset
        return (len(self.chroma_features) - self.window_size) // self.step_size + 1

    def __getitem(self, idx):
        # find window start and end index
        start_idx = idx * self.step_size
        end_idx = start_idx + self.window_size
        mid_idx = start_idx + (self.window_size // 2)

        # get both the chroma features as well as the label
        window_chroma = self.chroma_features[start_idx:end_idx]
        window_label = self.labels[mid_idx]

        # convert to tensors
        window_chroma = torch.tensor(window_chroma, dtype=torch.float32)
        window_label = torch.tensor(window_label, dtype=torch.long)

        return window_chroma, window_label

Dataloader