In [2]:
%load_ext autoreload
%autoreload 2

# System imports
import sys
import os

# Add the parent directory of 'notebooks' to sys.path
parent_dir = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # Move one level up
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

In [None]:
import torch
import pickle
from pathlib import Path
from solver import Solver
from griddy.griddy_tuna import hit_griddy, SearchMethod
from models.CRNN import CRNNModel
from data.data_loader import MirDataProcessor, ChordDataProcessor
from utils.model_utils import get_device
from solver import TrialMetric

In [None]:
device = get_device()
print(f"Device is {device}")

In [None]:
# If you have already ran the downloader, change the value of download to False
download = False

# Reprocess for different dataset type while bypassing download
reprocess = False

# Download and build useable train/test data out of the MIR Billboard dataset
data_processer = MirDataProcessor(output_dir='data', download=download, batch_size=256, process_sequential=True, seq_length=8, overlap_sequence=True, use_median=True) # your notebook should be in its own directory to begin with, this should create the "data" folder inside that
if download:
    data_processer.process_billboard_data(log_fail_only=False) # you may need to reprocess the downloaded data into sequential or tabular based on your model
if reprocess:
    data_processer.dataset.download(partial_download=['metadata'])
    data_processer.process_billboard_data(combined_notation=True, chord_vocab='majmin7inv', log_fail_only=False)
    # combined notation is standard billboard notation (C:maj), setting False creates separate CSVs for root and chord_class

# dataset options: 'combined', 'root', 'chord_class'
#train_loader, test_loader, num_classes = data_processer.build_data_loaders(device=device, dataset='combined', nrows=None)
train_loader, test_loader, num_classes = data_processer.build_data_loaders(device=device, dataset='root', nrows=None) # set nrows to shrink dataset for testing
#train_loader, test_loader, num_classes = data_processer.build_data_loaders(device=device, dataset='chord_class', nrows=2000) # set nrows to shrink dataset for testing

#print(f"Number of root classes: {root_num_classes}")
print(f"Number of chord classes: {num_classes}")

In [None]:
training_data_processor_spectro = ChordDataProcessor(
    device=device,
    #process_sequential=True
)

chord_train_loader, chord_test_loader, chord_num_classes = training_data_processor_spectro.process_all_and_build_loaders(
    chord_json_path="chord_ref.json", 
    notation="chord_class", 
    mode="spectrogram", 
    jsontype="keyed", #"keyed"
    audio_path="wav", #"timbral_bias_datasets/train/processed",
    batch_size=256,
    test_size=0.2,
    random_state=42,
)

data_directory = 'data'
# Save the training data loader
with open(f"{data_directory}{os.path.sep}chord_train_loader.pkl", "wb") as train_file:
    pickle.dump(chord_train_loader, train_file)

# Save the testing data loader
with open(f"{data_directory}{os.path.sep}chord_test_loader.pkl", "wb") as test_file:
    pickle.dump(chord_test_loader, test_file)

# Save the number of classes
with open(f"{data_directory}{os.path.sep}chord_num_classes.pkl", "wb") as classes_file:
    pickle.dump(chord_num_classes, classes_file)

print("Data loaders and number of classes saved successfully.")


In [None]:
training_data_processor = ChordDataProcessor(
    device=device,
    #process_sequential=True
)

chord_train_loader, chord_test_loader, chord_num_classes = training_data_processor.process_all_and_build_loaders(
    chord_json_path="chord_ref.json", 
    notation="chord_class", 
    mode="chroma", 
    jsontype="keyed", #"keyed"
    audio_path="wav", #"timbral_bias_datasets/train/processed",
    batch_size=256,
    test_size=0.2,
    random_state=42,
)

data_directory = 'data'
# Save the training data loader
with open(f"{data_directory}{os.path.sep}chord_train_loader.pkl", "wb") as train_file:
    pickle.dump(chord_train_loader, train_file)

# Save the testing data loader
with open(f"{data_directory}{os.path.sep}chord_test_loader.pkl", "wb") as test_file:
    pickle.dump(chord_test_loader, test_file)

# Save the number of classes
with open(f"{data_directory}{os.path.sep}chord_num_classes.pkl", "wb") as classes_file:
    pickle.dump(chord_num_classes, classes_file)

print("Data loaders and number of classes saved successfully.")


In [None]:
# Load the training data loader
with open(f"{data_directory}{os.path.sep}chord_train_loader.pkl", "rb") as train_file:
    chord_train_loader = pickle.load(train_file)

# Load the testing data loader
with open(f"{data_directory}{os.path.sep}chord_test_loader.pkl", "rb") as test_file:
    chord_test_loader = pickle.load(test_file)

# Load the number of classes
with open(f"{data_directory}{os.path.sep}chord_num_classes.pkl", "rb") as classes_file:
    chord_num_classes = pickle.load(classes_file)

print("Data loaders and number of classes loaded successfully.")

In [8]:
# NOTE: Do not assume these values are anything but trash, they're just here for testing

SOLVER_PARAMS = {
    Solver : {
        "device": device,
        "batch_size": 256,
        "epochs": 20,
        "early_stop_epochs": 5, # early stop after n epochs without improvement, 0 to disable
        "warmup_epochs": 0, # 0 to disable
        "dtype": "float16",
        "train_dataloader": chord_train_loader, # must be DataLoader object
        "valid_dataloader": chord_test_loader, # must be DataLoader object
    }
}

MODEL_PARAMS = {
    CRNNModel: {
        "input_features": 12,
        "num_classes": chord_num_classes,
        "hidden_size": 512,
        "cnn_params": {
            "n_blocks": 1,
            "block_depth": 3,
            "pad": 1,
            "stride": 1,
            "k_conv": 3,
            "dropout": 0.2,
            "out_channels": 64
        }
    }
}

OPTIM_PARAMS = {
    torch.optim.Adam : {
        "lr": 0.001,
    }
}

SCHED_PARAMS = {
    torch.optim.lr_scheduler.ReduceLROnPlateau : {
        "patience": 3,
        "verbose": True,
    }
}

CRITERION_PARAMS = {
    torch.nn.CrossEntropyLoss : {}
}

PARAM_SET = {
    "solver": SOLVER_PARAMS,
    "model" : MODEL_PARAMS,
    "optim" : OPTIM_PARAMS,
    "sched" : SCHED_PARAMS,
    "criterion" : CRITERION_PARAMS,
}

In [None]:
model = list(MODEL_PARAMS.keys())[0](**MODEL_PARAMS[list(MODEL_PARAMS.keys())[0]])
optimizer = list(OPTIM_PARAMS.keys())[0](**(OPTIM_PARAMS[list(OPTIM_PARAMS.keys())[0]] | {'params': model.parameters()}))
scheduler = list(SCHED_PARAMS.keys())[0](**(SCHED_PARAMS[list(SCHED_PARAMS.keys())[0]] | {'optimizer': optimizer}))
criterion = list(CRITERION_PARAMS.keys())[0](**CRITERION_PARAMS[list(CRITERION_PARAMS.keys())[0]])
solver = Solver(**(SOLVER_PARAMS[Solver] | {'model': model, 'optimizer': optimizer, 'scheduler': scheduler, 'criterion': criterion}))

solver.train_and_evaluate(plot_results=True)

In [22]:
import pandas as pd
import pickle

def save_history(solver, filename='history.pkl'):
    # Create a DataFrame from the history lists
    history_df = pd.DataFrame({
        'Train Accuracy': solver.train_accuracy_history,
        'Validation Accuracy': solver.valid_accuracy_history,
        'Train Loss': solver.train_loss_history,
        'Validation Loss': solver.valid_loss_history
    })

    # Pickle the DataFrame to the specified file
    with open(filename, 'wb') as file:
        pickle.dump(history_df, file)

    print(f'History saved to {filename}')

def load_history(self, filename='history.pkl'):
    # Load the pickled DataFrame from the file
    with open(filename, 'rb') as file:
        history_df = pickle.load(file)

    # If the class attributes need to be repopulated from the DataFrame:
    self.train_accuracy_history = history_df['Train Accuracy'].tolist()
    self.valid_accuracy_history = history_df['Validation Accuracy'].tolist()
    self.train_loss_history = history_df['Train Loss'].tolist()
    self.valid_loss_history = history_df['Validation Loss'].tolist()

    print(f'History loaded from {filename}')

In [None]:
save_history(solver, 'CRNN_history_class.pkl')

In [None]:
my_study = "crnn"

output_folder = Path("griddy")

hit_griddy(my_study, param_set=PARAM_SET, out_dir=output_folder, n_trials=2, n_jobs=2, prune=False, resume=False, trial_metric=TrialMetric.LOSS)
# NOTE: modest values of n_trials and n_jobs set here for testing, set your values accordingly