## Imports and setup

In [None]:
# Create or update conda environment
# !conda env create -f ../environment.yaml
!conda env update -f ../environment.yaml
# !conda remove --name amadeus-ex-machina --all

In [None]:
# Activate conda environment
!conda init
!conda activate amadeus-ex-machina

In [None]:

# System imports
import sys
import os

# Add the parent directory of 'notebooks' to sys.path
parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))  # Move one level up
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

# Class/model imports
from data.data_loader import MirDataProcessor
from utils.model_utils import get_device
from solver import Solver
import data.youtube_download as youtube_download
from pathlib import Path
from solver import Solver
from griddy.griddy_tuna import hit_griddy, SearchMethod

# Import models
from models.griddy_model import GriddyModel
from models.CRNN import CRNNModel
from models.CNN import CNNModel
from models.RNN import RNNModel
from models.AudioDataset import ChordDataset
from models.mlp_chord_classifier import MLPChordClassifier

# Package imports
import torch

# Select device
device = get_device()
print(f"Device is {device}")

## Download and process data

In [None]:
# Download and build useable train/test data out of the MIR Billboard dataset
data_processer = MirDataProcessor(download=True, batch_size=64)
data_processer.process_data()

# Create data loeaders for train and test set
train_loader, test_loader, num_classes = data_processer.build_data_loaders(device=device)

print(f"Number of classes: {num_classes}")

In [None]:
# Set billboard data path
billboard_data_path = "../data/raw/McGill-Billboard"

# Process lab files in the extracted directory
youtube_download.process_lab_files(billboard_data_path)

## Build and train models

In [None]:
SOLVER_PARAMS = {
    Solver : {
        "device": "cuda",
        "epochs": 10,
        "early_stop_epochs": 0, # early stop after n epochs without improvement, 0 to disable
        "warmup_epochs": 0, # 0 to disable
        "dtype": "float16",
        "train_dataloader": train_loader, # assume a DataLoader object
        "valid_dataloader": test_loader, # assume a DataLoader object
        "direction": "minimize" # must specify this, even if not used by solver
    }
}

MODEL_PARAMS = {
    CRNNModel: {
        "input_features": [24],
        "num_classes": [num_classes],
        "hidden_size": [128],
    }
}

OPTIM_PARAMS = {
    torch.optim.SGD : {
        "lr": [0.001, 0.1, SearchMethod.LOG_UNIFORM],
        "momentum": [0.9, 0.99, SearchMethod.UNIFORM],
        "weight_decay": [0.00001],
    },
    torch.optim.Adam : {
        "lr": [0.03, 0.02, 0.01, 0.1], # this will auto-search as CATEGORICAL
        "momentum": [0.98, 0.99], # so will this
        "weight_decay": [0.00001], # this won't be searched
    }
}

SCHED_PARAMS = {
    torch.optim.lr_scheduler.CosineAnnealingWarmRestarts : {
        "T_max": [10],
    },
    torch.optim.lr_scheduler.StepLR : {
        "step_size": [10, 20],
        "gamma" : [0.1, 0.05],
    }
}

CRITERION_PARAMS = {
    torch.nn.CrossEntropyLoss : {}
}

PARAM_SET = {
    "solver": SOLVER_PARAMS,
    "model" : MODEL_PARAMS,
    "optim" : OPTIM_PARAMS,
    "sched" : SCHED_PARAMS,
    "criterion" : CRITERION_PARAMS,
}

In [None]:
# Griddy with MLPChordClassifier

MODEL_PARAMS = {
    MLPChordClassifier: {
        "input_features": [24],
        "num_classes": [num_classes],
    }
}

study_name = "test12"
output_folder = Path(f"../results/{study_name}/griddy/")
solver_reference = hit_griddy(study_name, param_set=PARAM_SET, out_dir=output_folder, n_trials=100, n_jobs=1,resume=False)

In [None]:
# Griddy with CRNN

MODEL_PARAMS = {
    CRNNModel: {
        "input_features": [24],
        "num_classes": [num_classes],
        "hidden_size": [128],
    }
}

study_name = "test11"

output_folder = Path(f"../results/{study_name}/griddy/")

solver_reference = hit_griddy(study_name, param_set=PARAM_SET, out_dir=output_folder, n_trials=100, n_jobs=1,resume=False)

In [None]:
# Griddy with CNN

MODEL_PARAMS = {
    CNNModel: {
        "input_channels": [24],
        "num_classes": [num_classes],
    }
}

study_name = "test13"

output_folder = Path(f"../results/{study_name}/griddy/")

solver_reference = hit_griddy(study_name, param_set=PARAM_SET, out_dir=output_folder, n_trials=100, n_jobs=1,resume=False)

In [None]:
# Griddy with RNN

MODEL_PARAMS = {
    RNNModel: {
        "input_size": [24],
        "hidden_size": [128],
        "output_size": [num_classes],
    }
}

study_name = "test14"

output_folder = Path(f"../results/{study_name}/griddy/")

solver_reference = hit_griddy(study_name, param_set=PARAM_SET, out_dir=output_folder, n_trials=100, n_jobs=1,resume=False)

## Run inference

In [None]:
scaler = data_processor.scaler
label_encoder = data_processor.label_encoder
chroma_path = "/my/path/to/amadeus-ex-machina/data/raw/McGill-Billboard/0003/bothchroma.csv"

# Run inference using the trained model
solver.run_inference(
    chroma_path,
    scaler,
    label_encoder,
)