# Notebook for experimenting with different models

Setup the model params

In [None]:
import CoreAudioML.miscfuncs as miscfuncs
import CoreAudioML.training as training
import gc
import os
import time
import torch
import torch.optim as optim
from CoreAudioML.dataset import DataSet
from CoreAudioML.networks import load_model, SimpleRNN
from colab_functions import smoothed_spectrogram, gen_smoothed_spectrogram_plot, pyplot_to_tensor
from scipy.io.wavfile import write
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm


In [None]:
start_time = time.time()
GPU_CHUNK = 50000  # Chunk size, set based on GPU memory MAX ~60000

data_dir = "Data/valtteri"

hidden_size = 96

model = SimpleRNN(
    input_size=1,
    unit_type='LSTM',
    hidden_size=hidden_size,
    output_size=1,
    skip=0,
    num_layers=1
)

dtype = torch.float32
device = 'cuda'

torch.set_default_dtype(dtype)
torch.set_default_device(device)
torch.cuda.set_device(0)
model = model.cuda()

Setup up the optimizer, scheduler and loss function

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=5e-3, weight_decay=1e-4)

# TODO: test these params
# optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.33, patience=200,)

# optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5, verbose=False)
# loss_functions = training.LossWrapper({'ESRPre': 0.75, 'DC': 0.25}, 'awlp', 48000)
loss_functions = training.LossWrapper({'ESRPre': 1.0}, 'awlp', 48000)
train_track = training.TrainTrack()
writer = SummaryWriter(os.path.join('TensorboardData', f'LSTM-{hidden_size}'))

Load up the dataset

In [None]:
dataset = DataSet(data_dir=data_dir)

dataset.create_subset('train', frame_len=24000)  # 0.5s segments
dataset.load_file('train', 'train')  # will look for files that end in -input and -target

dataset.create_subset('val')
dataset.load_file('validate', 'val')



Training loop

In [None]:
# If training is restarting, this will ensure the previously elapsed training time is added to the total
init_time = time.time() - start_time + train_track['total_time']*3600
# Set network save_state flag to true, so when the save_model method is called the network weights are saved
model.save_state = True
patience_counter = 0
validation_patience_limit_epoch = 0
epochs = 2000
save_path = "Results/valtteri"

# This is where training happens
# the network records the last epoch number, so if training is restarted it will start at the correct epoch number
for epoch in tqdm(range(train_track['current_epoch'] + 1, epochs + 1)):
    ep_st_time = time.time()

    # Run 1 epoch of training,
    epoch_loss = model.train_epoch(
        dataset.subsets['train'].data['input'][0],
        dataset.subsets['train'].data['target'][0],
        loss_functions,
        optimizer,
        50,  # batch size
        200,  # initial length, number of samples before backpropagation
        1000  # number of samples that are run before updating weights
    )

    writer.add_scalar('Time/EpochTrainingTime', time.time()-ep_st_time, epoch)

    # Run validation
    if epoch % 2 == 0:
        val_ep_st_time = time.time()
        val_output, val_loss = model.process_data(
            dataset.subsets['val'].data['input'][0],
            dataset.subsets['val'].data['target'][0],
            loss_functions,
            GPU_CHUNK,
        )
        scheduler.step(val_loss)
        if val_loss < train_track['best_val_loss']:
            #print("new best val loss: %f" % val_loss.item())
            patience_counter = 0
            model.save_model('model_best', save_path)
            write(
                os.path.join(save_path, "best_val_out.wav"),
                dataset.subsets['val'].fs,
                val_output.cpu().numpy()[:, 0, 0]
            )
        else:
            patience_counter += 1
        train_track.val_epoch_update(val_loss.item(), val_ep_st_time, time.time())
        writer.add_scalar(
            'TrainingAndValidation/ValidationLoss',
            train_track['validation_losses'][-1],
            epoch
        )

    #print('current learning rate: ' + str(optimiser.param_groups[0]['lr']))
    train_track.train_epoch_update(epoch_loss.item(), ep_st_time, time.time(), init_time, epoch)
    # write loss to the tensorboard (just for recording purposes)
    writer.add_scalar('TrainingAndValidation/TrainingLoss', train_track['training_losses'][-1], epoch)
    writer.add_scalar('TrainingAndValidation/LearningRate', optimizer.param_groups[0]['lr'], epoch)
    model.save_model('model', save_path)
    miscfuncs.json_save(train_track, 'training_stats', save_path)

    if patience_counter > 25:
        validation_patience_limit_epoch = epoch
        break

if validation_patience_limit_epoch:
    print('validation patience limit reached at epoch ' + str(validation_patience_limit_epoch))

print("done training")

Cleanup CUDA stuff and datasets

In [None]:
cuda_max_memory_allocated = torch.cuda.max_memory_allocated()
train_track['maxmemusage'] = cuda_max_memory_allocated
with open(os.path.join(save_path, 'maxmemusage.txt'), 'w') as f:
    f.write(str(cuda_max_memory_allocated))

# Remove dataset from memory
del dataset
# Empty the CUDA Cache
torch.cuda.empty_cache()
# Invoke garbage collector
gc.collect()

Create new dataset object for validation tests

In [None]:
# Create a new data set
dataset = DataSet(data_dir=data_dir)
# Then load the Test data set
dataset.create_subset('test')
dataset.load_file('test', 'test')

# Loss functions to be used in test Dataset
lossESR = training.ESRLoss()
#lossDC = training.DCLoss()
#lossLOGCOSH = auraloss_adapter(LogCoshLoss())
#lossSTFT = auraloss_adapter(STFTLoss())
#lossMRSTFT = auraloss_adapter(MultiResolutionSTFTLoss())

f, y1, min_, max_ = smoothed_spectrogram(
    dataset.subsets['test'].data['target'][0].cpu().numpy()[:, 0, 0],
    fs=dataset.subsets['test'].fs,
    size=4096
)

Test the final model

In [None]:
print("testing the final model")
# Test the model the training ended with
test_output, test_loss = model.process_data(
    dataset.subsets['test'].data['input'][0],
    dataset.subsets['test'].data['target'][0],
    loss_functions,
    GPU_CHUNK,
)

f, y2, min_, max_ = smoothed_spectrogram(
    test_output.cpu().numpy()[:, 0, 0],
    fs=dataset.subsets['test'].fs,
    size=4096
)

with torch.no_grad():
    test_loss_ESR = lossESR(test_output, dataset.subsets['test'].data['target'][0])
    #test_loss_ESR_p = lossESR(test_output, dataset.subsets['test'].data['target'][0], pooling=True)
    #test_loss_DC = lossDC(test_output, dataset.subsets['test'].data['target'][0])
    #test_loss_LOGCOSH = lossLOGCOSH(test_output, dataset.subsets['test'].data['target'][0])
    #test_loss_STFT = lossSTFT(test_output, dataset.subsets['test'].data['target'][0])
    #test_loss_MRSTFT = lossMRSTFT(test_output, dataset.subsets['test'].data['target'][0])
write(os.path.join(save_path, "test_out_final.wav"), dataset.subsets['test'].fs, test_output.cpu().numpy()[:, 0, 0])
#write(os.path.join(save_path, "test_final_ESR.wav"), dataset.subsets['test'].fs, test_loss_ESR_p.cpu().numpy()[:, 0, 0])
writer.add_scalar('Testing/FinalTestLoss', test_loss.item())
writer.add_scalar('Testing/FinalTestESR', test_loss_ESR.item())
#writer.add_scalar('Testing/FinalTestDC', test_loss_DC.item())
#writer.add_scalar('Testing/FinalTestLOGCOSH', test_loss_LOGCOSH.item())
#writer.add_scalar('Testing/FinalTestSTFT', test_loss_STFT.item())
#writer.add_scalar('Testing/FinalTestMRSTFT', test_loss_MRSTFT.item())

writer.add_image(
    'Testing/FinalPeakSpectrogram',
    pyplot_to_tensor(
        gen_smoothed_spectrogram_plot(
            f,
            target=y1,
            predicted=y2,
            title='Testing/FinalPeakSpectrogram'
        )
    )
)

train_track['test_loss_final'] = test_loss.item()
train_track['test_lossESR_final'] = test_loss_ESR.item()
#train_track['test_lossDC_final'] = test_loss_DC.item()
#train_track['test_lossLOGCOSH_final'] = test_loss_LOGCOSH.item()
#train_track['test_lossSTFT_final'] = test_loss_STFT.item()
#train_track['test_lossMRSTFT_final'] = test_loss_MRSTFT.item()

# Add input/output reference batch to training stats
# For input batch in case of conditioned models, we assume all params equal to 0.0
train_track['input_batch'] = dataset.subsets['test'].data['input'][0].cpu().data.numpy()[:2048, 0, 0].tolist()
train_track['output_batch_final'] = test_output.cpu().data.numpy()[:2048, 0, 0].tolist()

Test the best model

In [None]:
print("testing the best model")
# Test the best model
del model
# Invoke garbage collector
gc.collect()
best_val_net = miscfuncs.json_load('model_best', save_path)
model = load_model(best_val_net)
test_output, test_loss = model.process_data(
    dataset.subsets['test'].data['input'][0],
    dataset.subsets['test'].data['target'][0],
    loss_functions,
    GPU_CHUNK,
)

f, y2, min_, max_ = smoothed_spectrogram(test_output.cpu().numpy()[:, 0, 0], fs=dataset.subsets['test'].fs, size=4096)

with torch.no_grad():
    test_loss_ESR = lossESR(test_output, dataset.subsets['test'].data['target'][0])
    #test_loss_ESR_p = lossESR(test_output, dataset.subsets['test'].data['target'][0], pooling=True)
    #test_loss_DC = lossDC(test_output, dataset.subsets['test'].data['target'][0])
    #test_loss_LOGCOSH = lossLOGCOSH(test_output, dataset.subsets['test'].data['target'][0])
    #test_loss_STFT = lossSTFT(test_output, dataset.subsets['test'].data['target'][0])
    #test_loss_MRSTFT = lossMRSTFT(test_output, dataset.subsets['test'].data['target'][0])
write(os.path.join(save_path, "test_out_best.wav"), dataset.subsets['test'].fs, test_output.cpu().numpy()[:, 0, 0])
#write(os.path.join(save_path, "test_best_ESR.wav"), dataset.subsets['test'].fs, test_loss_ESR_p.cpu().numpy()[:, 0, 0])
writer.add_scalar('Testing/BestTestLoss', test_loss.item())
writer.add_scalar('Testing/BestTestESR', test_loss_ESR.item())
#writer.add_scalar('Testing/BestTestDC', test_loss_DC.item())
#writer.add_scalar('Testing/BestTestLOGCOSH', test_loss_LOGCOSH.item())
#writer.add_scalar('Testing/BestTestSTFT', test_loss_STFT.item())
#writer.add_scalar('Testing/BestTestMRSTFT', test_loss_MRSTFT.item())

writer.add_image('Testing/BestPeakSpectrogram', pyplot_to_tensor(gen_smoothed_spectrogram_plot(f, target=y1, predicted=y2, title='Testing/BestPeakSpectrogram')))

train_track['test_loss_best'] = test_loss.item()
train_track['test_lossESR_best'] = test_loss_ESR.item()
#train_track['test_lossDC_best'] = test_loss_DC.item()
#train_track['test_lossLOGCOSH_best'] = test_loss_LOGCOSH.item()
#train_track['test_lossSTFT_best'] = test_loss_STFT.item()
#train_track['test_lossMRSTFT_best'] = test_loss_MRSTFT.item()

# Add output reference batch to training stats, input already entered previously
train_track['output_batch_best'] = test_output.cpu().data.numpy()[:2048, 0, 0].tolist()

Save train track

In [None]:
miscfuncs.json_save(train_track, 'training_stats', save_path)

In [None]:
model_path = 'Results/valtteri/model_best.json'
save_path = "Results/valtteri/"

import json
import numpy as np
import tensorflow as tf
from tensorflow import keras
from model_utils import save_model

with open(model_path) as json_file:
    model_data = json.load(json_file)
    try:
        model_type = model_data['model_data']['model']
        if model_type != "SimpleRNN":
            print("Error! This model type is still unsupported")
            raise KeyError
        input_size = model_data['model_data']['input_size']
        num_layers = model_data['model_data']['num_layers']
        unit_type = model_data['model_data']['unit_type']
        hidden_size = model_data['model_data']['hidden_size']
        skip = int(model_data['model_data']['skip']) # How many input elements are skipped
        output_size = model_data['model_data']['output_size']
        bias_fl = bool(model_data['model_data']['bias_fl'])
        lin_weight = np.array(model_data['state_dict']['lin.weight'])
        lin_bias = np.array(model_data['state_dict']['lin.bias'])
    except KeyError:
        print("Model file %s is corrupted" % (model))
        exit(1)

with tf.device("/cpu:0"):
    input_layer = keras.layers.Input(shape=(None, input_size))
    rnn = keras.layers.LSTM(
        hidden_size,
        activation=None,
        return_sequences=True,
        recurrent_activation=None,
        use_bias=bias_fl,
        unit_forget_bias=False,
    )(input_layer)
    dense_layer = keras.layers.Dense(
        1, 
        kernel_initializer="orthogonal", 
        bias_initializer='random_normal'
    )(rnn)
    model = keras.models.Model(inputs=input_layer, outputs=dense_layer)

num = 0
WVals = np.array(model_data['state_dict']['rec.weight_ih_l%d' % num])
UVals = np.array(model_data['state_dict']['rec.weight_hh_l%d' % num])
bias_ih_l0 =  np.array(model_data['state_dict']['rec.bias_ih_l%d' % num])
bias_hh_l0 = np.array(model_data['state_dict']['rec.bias_hh_l%d' % num])

lstm_weights = []
lstm_weights.append(np.transpose(WVals))
lstm_weights.append(np.transpose(UVals))
BVals = (bias_ih_l0 + bias_hh_l0)
lstm_weights.append(BVals)
model.layers[1].set_weights(lstm_weights)

dense_weights = []
dense_weights.append(lin_weight.reshape(hidden_size, 1))
dense_weights.append(lin_bias)
model.layers[2].set_weights(dense_weights)

output_model_path = "Results/valtteri/model_best_rtneural.json"
save_model(
    model,
    output_model_path,
    keras.layers.InputLayer,
    skip=skip,
    input_batch=None,
    output_batch=None,
    metadata=None,
    verbose=False
)
