# Imports

In [1]:
import os

import numpy as np
import tensorflow as tf

import wandb
from wandb.keras import WandbCallback

import config
import utils
import music_model

2022-12-23 13:38:41.155530: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-23 13:38:41.281051: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-12-23 13:38:41.313595: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-12-23 13:38:41.954498: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; 

# Setup

Set the following variables before training

In [2]:
ADDITIONAL_MODEL_NAME = ''
LOG_NAME = ''                 # A log name for visualization on Wandb (if left empty it will be a random name)

MODEL_TYPE  = 'GPT'            # "GPT" or "XL"
USE_MASK    = False
USE_REG     = True

USE_WANDB   = True
USE_ONE_GPU = True           # or False if another GPU is available

USE_SMALL_GENRE_SET = True  # or False if we want to use the dataset with the full genre subset list
DATASET_NAME = 'tf_data7dict' # or whatever

In [3]:
if USE_MASK and not USE_REG:
    MODEL_NAME = 'mask_only'
elif USE_REG and not USE_MASK:
    MODEL_NAME = 'reg_only'
elif USE_REG and USE_MASK:
    MODEL_NAME = 'reg_and_mask'
else:
    MODEL_NAME = 'baseline'

In [4]:
ROOT_PATH = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
conf = config.Config(config_string="single_instruments_type", 
                     root_path=ROOT_PATH, 
                     model_type=MODEL_TYPE,
                     model_name=f'model_{MODEL_TYPE}_{MODEL_NAME + ADDITIONAL_MODEL_NAME}')

# Config object has by default the full list of accepted subgenres and works on multi-gpus
# If we use the small dataset
if USE_SMALL_GENRE_SET:
    conf.accepted_subgenres = ['folk', 'nes', 'maestro']

# If we need to use only the first GPU
if USE_ONE_GPU:
    conf.GPUS = tf.config.experimental.list_physical_devices('GPU')[0]
    conf.BATCH_SIZE = 4
    conf.GLOBAL_BATCH_SIZE = conf.BATCH_SIZE
    conf.num_devices = 1

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')


2022-12-23 13:39:06.966273: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-23 13:39:08.187136: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 30503 MB memory:  -> device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:3b:00.0, compute capability: 7.0
2022-12-23 13:39:08.187712: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 646 MB memory:  -> device: 1, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:86:00.0, compute capability: 7.0


# Training

## Model creation

In [5]:
if conf.num_devices > 1:
    print("Using multiple GPUs with Mirrored Strategy")
    with conf.training_strategy.scope():
        model = music_model.create_model(conf, 
                                         use_masking_layers=USE_MASK,
                                         use_regularization=USE_REG)
else:
    print("Using single GPU/CPU device")
    model = music_model.create_model(conf, 
                                     use_masking_layers=USE_MASK,
                                     use_regularization=USE_REG)

Using single GPU/CPU device


## Wandb setup

In [6]:
if USE_WANDB:
    wandb_config = {
        'gpus': conf.num_devices,
        'dataset': DATASET_NAME,
        'genres': conf.accepted_subgenres,
        'embedding_size': conf.SINGLE_EMB_SIZE,
        'batch_size': conf.BATCH_SIZE,
        'global_batch_size': conf.GLOBAL_BATCH_SIZE,
        'reg_loss_scale': conf.REG_LOSS_SCALE,
        'masking': conf.USE_MASKING,
        'dropout_prob': conf.DROPOUT_VALUE,
        'seq_len': conf.SEQ_LEN,
        'token_dim': conf.TOKEN_DIM,
        'genre_dim': conf.GENRE_DIM,
        'attn_heads': conf.ATTENTION_HEADS,
        'attn_blocks': conf.ATTENTION_BLOCKS,
    }

    if MODEL_TYPE == 'GPT':
        wandb_config['activation_func'] = conf.DECODER_ACTIVATION_FUNCTION
    elif MODEL_TYPE == 'XL':
        wandb_config['sequence_blocks'] = conf.DIV_VAL
        wandb_config['head_dim']  = conf.HEAD_DIM
        wandb_config['inner_dim'] = conf.INNER_DIM
        wandb_config['memory_length'] = conf.MEMORY_LEN

    run = wandb.init(project="Music Generation", entity="marcello-e-federico",
                     group=MODEL_NAME, job_type='train', config=wandb_config,
                     name=LOG_NAME if LOG_NAME != '' else None)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mvolpepe[0m ([33mmarcello-e-federico[0m). Use [1m`wandb login --relogin`[0m to force relogin


## Loading dataset

In [7]:
dataset_path = conf.dataset_paths[DATASET_NAME]
train_dataset, val_dataset, test_dataset = utils.get_dataset_splits(dataset_path, conf)

## Training

In [8]:
callbacks = conf.MODEL_CALLBACKS
if USE_WANDB:
    callbacks.append(WandbCallback(
        save_model=False, save_graph=False,
        log_weights=True
    ))

In [None]:
model.fit(
    train_dataset,
    epochs = 100,
    callbacks = callbacks,
    validation_data = val_dataset,
    # initial_epoch = initial_epoch # change if resuming from previous checkpoint
)

Epoch 1/100
Epoch 2/100
 184/1394 [==>...........................] - ETA: 22:46 - loss: 3.5588 - type_loss: 0.0106 - measure_loss: 0.1354 - beat_loss: 0.1085 - position_loss: 0.0094 - duration_loss: 0.1359 - pitch_loss: 1.0177 - instrument_loss: 0.3280 - velocity_loss: 0.6115 - key_sign_loss: 5.9348e-04 - time_sign_loss: 0.0107 - tempo_loss: 0.0078 - regularization_loss: 1.1827

In [None]:
if USE_WANDB:
    run.finish()