In [None]:
%load_ext autoreload
%autoreload 2

# System imports
import sys
import os

# Add the parent directory of 'notebooks' to sys.path
parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))  # Move one level up
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

from data.data_loader import MirDataProcessor
from utils.model_utils import get_device

In [None]:
device = get_device()
print(f"Device is {device}")

### Example for majmin7inv using separated root and chord_class datasets

In [None]:
# chord_vocab options: 'full', 'majmin', 'majmininv', 'majmin7', 'majmin7inv'

In [None]:
# If you have already ran the downloader, change the value of download to False
download = False

# Reprocess for different dataset type while bypassing download
reprocess = True

# Download and build useable train/test data out of the MIR Billboard dataset
data_processer = MirDataProcessor(output_dir=None, download=download, batch_size=64) # your notebook should be in its own directory to begin with, this should create the "data" folder inside that
if download:
    data_processer.process_billboard_data(log_fail_only=False) # you may need to reprocess the downloaded data into sequential or tabular based on your model
if reprocess:
    data_processer.dataset.download(partial_download=['metadata'])
    data_processer.process_billboard_data(combined_notation=False, chord_vocab='majmin7inv', log_fail_only=False)
    # combined notation is standard billboard notation (C:maj), setting False creates separate CSVs for root and chord_class

# dataset options: 'combined', 'root', 'chord_class'
root_train_loader, root_test_loader, root_num_classes = data_processer.build_data_loaders(device=device, dataset='root', nrows=None) # set nrows to shrink dataset for testing
chord_train_loader, chord_test_loader, chord_num_classes = data_processer.build_data_loaders(device=device, dataset='chord_class', nrows=None) # set nrows to shrink dataset for testing

print(f"Number of root classes: {root_num_classes}")
print(f"Number of chord classes: {chord_num_classes}")

### Example for majmin7inv using regular combined notation

In [None]:
# If you have already ran the downloader, change the value of download to False
download = False

# Reprocess for different dataset type while bypassing download
reprocess = True

# Download and build useable train/test data out of the MIR Billboard dataset
data_processer = MirDataProcessor(output_dir=None, download=download, batch_size=64) # your notebook should be in its own directory to begin with, this should create the "data" folder inside that
if download:
    data_processer.process_billboard_data(log_fail_only=False) # you may need to reprocess the downloaded data into sequential or tabular based on your model
if reprocess:
    data_processer.dataset.download(partial_download=['metadata'])
    data_processer.process_billboard_data(combined_notation=True, chord_vocab='majmin7inv', log_fail_only=False)
    # combined notation is standard billboard notation (C:maj), setting False creates separate CSVs for root and chord_class

# dataset options: 'combined', 'root', 'chord_class'
train_loader, test_loader, num_classes = data_processer.build_data_loaders(device=device, dataset='combined', nrows=None) # set nrows to shrink dataset for testing

print(f"Number of classes: {num_classes}")