In [1]:
import os
from datetime import datetime

working_path = os.getcwd()
print(working_path)

if working_path == '/content':
    from google.colab import drive
    drive.mount('/content/drive')

    %cd /content/drive/Othercomputers/Il mio MacBook Pro/MagicKnob

    path = "/content/drive/Othercomputers/Il mio MacBook Pro/MagicKnob/"

    # check python file folder
    assert os.path.exists(path + "python"), f"Upload python files in {path}python"
    %cd ./python

    # check data folder
    assert os.path.exists(path + "data"), f"Upload data files in {path}data"
else:
    path = "../"

    # check python file folder
    assert os.path.exists(path + "python"), f"Upload python files in {path}python"

    # check data folder
    assert os.path.exists(path + "data"), f"Upload data files in {path}data"

/content
Mounted at /content/drive
/content/drive/Othercomputers/Il mio MacBook Pro/MagicKnob
/content/drive/Othercomputers/Il mio MacBook Pro/MagicKnob/python


In [2]:
import myk_data_par
import myk_models
import myk_loss
import myk_train
import myk_evaluate_par

import torch
from torch.utils.data import DataLoader

In [3]:
from torch.utils.tensorboard import SummaryWriter

In [4]:
# used for the writing of example outputs
run_name="audio_logic_dist_par"
# dataset : need an input and output folder in this folder
audio_folder = f"../data/{run_name}"
#audio_folder = "../../data/audio_ht1"
assert os.path.exists(audio_folder), f"Audio folder  not found. Looked for {audio_folder}"
# used to render example output during training
test_file = "../data/guitar.wav"
assert os.path.exists(test_file), "Test file not found. Looked for " + test_file

In [5]:
# initialize net specs
lstm_hidden_size = 16
learning_rate = 5e-3
last_learning_rate = learning_rate
batch_size = 50
max_epochs = 10000

# create the logger for tensorboard
writer = SummaryWriter()

In [6]:
print("Loading dataset from folder ", audio_folder)

input_folder = "/input/"
output_folder = "/output_randomMany/"

dataset = myk_data_par.generate_dataset(audio_folder + input_folder, audio_folder + output_folder, frag_len_seconds=0.5)

print("Splitting dataset")
train_ds, val_ds, test_ds = myk_data_par.get_train_valid_test_datasets(dataset)

Loading dataset from folder  ../data/audio_logic_dist_par
loading input and output of logicdist
    loading output of logicdist with parameter 0.02
    generate_dataset:: Loaded frames from audio file 22050
    found input fragments of shape (120, 22050, 2)
    found output fragments of shape (120, 22050, 1)
    total input shape: (120, 22050, 2)
    total output shape: (120, 22050, 1)

    loading output of logicdist with parameter 0.04
    generate_dataset:: Loaded frames from audio file 22050
    found input fragments of shape (120, 22050, 2)
    found output fragments of shape (120, 22050, 1)
    total input shape: (240, 22050, 2)
    total output shape: (240, 22050, 1)

    loading output of logicdist with parameter 0.06
    generate_dataset:: Loaded frames from audio file 22050
    found input fragments of shape (120, 22050, 2)
    found output fragments of shape (120, 22050, 1)
    total input shape: (360, 22050, 2)
    total output shape: (360, 22050, 1)

    loading output of 

In [7]:
dataset[0]

(tensor([[-6.1035e-05,  2.0000e-02],
         [-1.8311e-04,  2.0000e-02],
         [-1.8311e-04,  2.0000e-02],
         ...,
         [ 0.0000e+00,  2.0000e-02],
         [ 0.0000e+00,  2.0000e-02],
         [ 0.0000e+00,  2.0000e-02]]),
 tensor([[-6.1035e-05],
         [-1.5259e-04],
         [-1.8311e-04],
         ...,
         [-9.1553e-05],
         [-9.1553e-05],
         [-6.1035e-05]]))

In [8]:
# test GPU, must be done after splitting
device = myk_train.get_device()

cuda device available


In [9]:
# create data loaders
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, generator=torch.Generator(device=device))
val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=True, generator=torch.Generator(device=device))
test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=True, generator=torch.Generator(device=device))

In [10]:
model = myk_models.SimpleLSTM(hidden_size=lstm_hidden_size, param=True).to(device)
total_params = sum(p.numel() for p in model.parameters())
print(model), total_params

SimpleLSTM(
  (lstm): LSTM(2, 16, batch_first=True)
  (dense): Linear(in_features=16, out_features=1, bias=True)
)


(None, 1297)

In [11]:
# crate optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5, verbose=True)

loss_functions = myk_loss.LossWrapper()

# https://github.com/Alec-Wright/Automated-GuitarAmpModelling/blob/main/dist_model_recnet.py
# https://github.com/Alec-Wright/CoreAudioML/blob/bad9469f94a2fa63a50d70ff75f5eff2208ba03f/training.py

In [12]:
# %load_ext tensorboard
# %tensorboard --logdir runs

In [13]:
# training loop
lowest_val_loss = 0
best_loss = False

max_patience = 500
curr_patience = max_patience

# datetime object containing current date and time
best_epoch = 0
now = datetime.now()
dt_string = now.strftime(f"models/{model.model_type}_%d-%m-%Y_%H-%M-%S")

os.mkdir(os.path.join(".", dt_string))

with open(f'{dt_string}/model_structure.txt', "w") as model_structure:
    model_structure.write(str(model))
    model_structure.write(f"total params: {total_params}")

results_folder = audio_folder + "/results"
if not os.path.exists(results_folder):
    os.mkdir(results_folder)
results_folder += output_folder
if not os.path.exists(results_folder):
    os.mkdir(results_folder)

for epoch in range(max_epochs):
    #ep_loss = myk_train.train_epoch_interval(model, train_dl, loss_functions, optimizer, device=device)
    ep_loss = myk_train.train_epoch_interval(model, train_dl, loss_functions, optimizer, device=device)

    #ep_loss = myk_train.train_epoch(model, train_dl, loss_functions, optimizer, device=device)
    val_loss = myk_train.compute_batch_loss(model, val_dl, loss_functions, device=device)
    scheduler.step(val_loss);

    writer.add_scalar("Loss/val", val_loss, epoch)
    writer.add_scalar("Loss/train", ep_loss, epoch)
    writer.add_scalar("LR", optimizer.param_groups[0]['lr'], epoch)

    # check if we have beaten our best loss to date
    if lowest_val_loss == 0:# first run
        lowest_val_loss = val_loss
    elif val_loss < lowest_val_loss:# new record
        lowest_val_loss = val_loss
        best_loss = True
    else: # no improvement
        best_loss = False
        curr_patience -= 1

    if best_loss: # save best model so far
        best_epoch = epoch
        print(f"    Record loss - saving at epoch {epoch}")
        # save for RTNeural
        model.save_for_rtneural(f"{dt_string}/model.json")
        # save for pythorch
        torch.save(model.state_dict(), f"{dt_string}/model.ph")
        print(f"    epoch {epoch}, train_loss {ep_loss}, val_loss {val_loss} ")
        curr_patience = max_patience
    if epoch % 50 == 0: # save an example processed audio file
        myk_evaluate_par.run_file_through_model(model, test_file, results_folder + run_name + str(epoch)+".wav")
        print(f"epoch {epoch}, train_loss {ep_loss}, val_loss {val_loss} ")
    if curr_patience == 0:
        print("max patience reached, stopping training")
        # load best parameters in the model
        model.load_state_dict(torch.load(f"{dt_string}/model.ph"))
        model.eval() # set inference state in the possible layers that need it
        myk_evaluate_par.run_file_through_model(model, test_file, results_folder + run_name + str(best_epoch)+"_BEST.wav", final_eval=True)
        break

epoch 0, train_loss 0.18877896666526794, val_loss 0.18649597465991974 
    Record loss - saving at epoch 1
    epoch 1, train_loss 0.16599775850772858, val_loss 0.16013415157794952 
    Record loss - saving at epoch 7
    epoch 7, train_loss 0.1624847799539566, val_loss 0.15866796672344208 
    Record loss - saving at epoch 12
    epoch 12, train_loss 0.1629391610622406, val_loss 0.1553790420293808 
    Record loss - saving at epoch 14
    epoch 14, train_loss 0.16197621822357178, val_loss 0.15458525717258453 
Epoch 00021: reducing learning rate of group 0 to 2.5000e-03.
    Record loss - saving at epoch 25
    epoch 25, train_loss 0.15977855026721954, val_loss 0.15359361469745636 
Epoch 00032: reducing learning rate of group 0 to 1.2500e-03.
Epoch 00038: reducing learning rate of group 0 to 6.2500e-04.
    Record loss - saving at epoch 39
    epoch 39, train_loss 0.1602579802274704, val_loss 0.15323595702648163 
Epoch 00046: reducing learning rate of group 0 to 3.1250e-04.
epoch 50, t

In [18]:
to_append = [f'\n{run_name}/**/*.wav', f'\n!{run_name}/**/*_BEST*.wav', f'\n!{run_name}/output*/*.wav']

with open(f'{audio_folder}/../.gitignore', "r+") as gitignore:
    read = gitignore.read()
    if to_append[0][2:len(to_append[0])] not in read:
        gitignore.writelines(to_append)
    if to_append_output[2:len(to_append_output)] not in read:
        gitignore.writelines(to_append_output)