In [None]:
import os
import logging, os

logging.disable(logging.WARNING)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

In [None]:
from matplotlib import pyplot as plt
from musicnet.utils import Track, get_train_ids, instruments_vocab, notes_vocab
import librosa
import numpy as np
import tensorflow as tf
from tensorflow import keras
from glob import glob
from musicnet.models.transformer.Transformer import TransformerLRSchedule, WarmupLRSchedule
from sklearn.metrics import precision_score, recall_score, f1_score
from tensorflow.nn import weighted_cross_entropy_with_logits
from musicnet.preprocessing.midi_to_wav.utils import MidiConvertedTrack, get_midi_train_ids
from musicnet.preprocessing.wav_specs_and_notes.utils import load_params, Preprocessor, create_tf_record_ds
import mido

In [None]:
tf.__version__

In [None]:
tf.config.list_physical_devices("GPU")

In [None]:
params = load_params()

In [None]:
print(params)

In [None]:
if params["instruments_whitelist"]:
    instruments_vocab = { v: k for k, v in enumerate(params["instruments_whitelist"]) }

In [None]:
ds_params = {
    "n_filters": params["preprocessor"]["n_filters"],
    "target_classes": len(instruments_vocab) * len(notes_vocab),
    "batch_size": 8,
}
train_ds = create_tf_record_ds("train", **ds_params)

In [None]:
for batch in train_ds:
    print(batch)
    break

In [None]:
id = get_midi_train_ids()[-1]
track = MidiConvertedTrack(id)
preprocessor = Preprocessor(**params["preprocessor"], ins)
x_chunks, y_chunks = preprocessor.preprocess(track)

In [None]:
print(x_chunks.shape, y_chunks.shape)

In [None]:
track.get_metadata()

In [None]:
track.get_midi_path()

In [None]:
notes = track.read_midi_notes()

In [None]:
len(notes)

In [None]:
# mid = mido.MidiFile(track.get_midi_path())

# i = 0
# for msg in mid:
#     # if msg.type == "control_change" or msg.type == "program_change":
#     #     print(msg)
#     if msg.type == "note_on":
#         print(msg)

In [None]:
midi_notes = track.read_midi_notes()

In [None]:
print(midi_notes.iloc[10:])
print(midi_notes.iloc[-10:])

In [None]:
csv_data = track.read_csv_data()

print(csv_data.iloc[:10])
print(csv_data.iloc[-10:])

In [None]:
np.array(sorted(csv_data["instrument"].unique()))

In [None]:
np.array(sorted(midi_notes["program"].unique()))

In [None]:
# print(csv_data["instrument"].unique())

In [None]:
# print(instruments_vocab)

In [None]:
print(x_chunks.shape, y_chunks.shape)

In [None]:
plt.figure(figsize=(20, 15))

for i in range(0, 2):
    plt.subplot(2, 2, i+1)
    librosa.display.specshow(
        x_chunks[i].T,
        sr=params["preprocessor"]["target_sr"],
        x_axis="time",
        y_axis="mel",
        hop_length=params["preprocessor"]["hop_length"],
        fmin=16,
        fmax=4096
    )
    plt.colorbar(label="dB")
    plt.title('Mel-Spectrogram (dB)', fontdict=dict(size=18))
    plt.xlabel('Time', fontdict=dict(size=15))
    plt.ylabel('Frequency', fontdict=dict(size=15))

for i in range(0, 2):
    plt.subplot(2, 2, i+3)
    plt.imshow(y_chunks[i].T, aspect="auto")
    plt.colorbar()

In [None]:
y_pred = np.array([
    [ # First sequence
        [0.7, 0, 0, 0],
        [0.7, 0, 0.7, 0.7],
        [0, 0, 0, 0],
        [0, 0.7, 0, 0]
    ],
    [ # Second sequence
        [0.7, 0.7, 0.7, 0.7],
        [0, 0, 0, 0],
        [0.7, 0.7, 0.7, 0.7],
        [0, 0, 0, 0]
    ]
])

y_true = np.array([
    [ # First sequence
        [1, 0, 0, 0],
        [1, 1, 1, 0],
        [1, 0, 0, 0],
        [1, 1, 0, 1]
    ],
    [ # Second sequence
        [0, 1, 1, 1],
        [0, 0, 0, 1],
        [0, 0, 1, 1],
        [0, 0, 0, 1]
    ] 
])
f1 = keras.metrics.F1Score(threshold=0.5, average="macro")
f1.update_state(y_true.reshape(-1, 4), y_pred.reshape(-1, 4))

f1s = []
for c in range(0, 4):
    # Calculate precision, recall and f1 separately for each class:
    y_pred_c = y_pred[:, :, c].flatten()
    y_true_c = y_true[:, :, c].flatten()
    f1_c = f1_score(y_true_c, y_pred_c > 0.5)
    f1s.append(f1_c)
    print(c, {
        "precision":precision_score(y_true_c, y_pred_c > 0.5),
        "recall": recall_score(y_true_c, y_pred_c > 0.5),
        "f1": f1_c
    })

print("Final result: ", f1.result())
print("Avg f1:", np.mean(f1s))

In [None]:
inputs = np.array([
    [
        [10, 0.1],
        [20, 0.05],
        [40, 0.1],
        [20, 0.2],
        [10, 0.1]
    ],
    [
        [15, 0.05],
        [25, 0.03],
        [40, 0.1],
        [20, 0.2],
        [35, 0.15]
    ]
], dtype=np.float64)
bn = keras.layers.BatchNormalization(epsilon=1e-5)
print("Batch mean", inputs.reshape(-1, inputs.shape[-1]).mean(axis=0))
print("Batch std", inputs.reshape(-1, inputs.shape[-1]).std(axis=0))
print(
    "Numpy calc (batch dim flatten)",
    np.round((inputs - inputs.reshape(-1, inputs.shape[-1]).mean(axis=0)) / inputs.reshape(-1, inputs.shape[-1]).std(axis=0), 2)
)
print(
    "BatchNorm layer",
    np.round(bn(inputs, training=True).numpy(), 2)
)
print(bn.weights[0].value)
print(bn.weights[1].value)
print(bn.weights[2].value)
print(bn.weights[3].value)

In [None]:
transformer_lr_schedule = TransformerLRSchedule(128, 4000)
warmup_lr_schedule = WarmupLRSchedule(0.0002, 4000)
plt.plot(transformer_lr_schedule(tf.range(40000, dtype=tf.float32)), label="TransformerLRSchedule")
plt.plot(warmup_lr_schedule(tf.range(40000, dtype=tf.float32)), label="WarmupLRSchedule")
plt.ylabel('Learning Rate')
plt.xlabel('Train Step')