In [1]:
import os
from datetime import datetime

working_path = os.getcwd()
print(working_path)

if working_path == '/content':
    from google.colab import drive
    drive.mount('/content/drive')

    %cd /content/drive/Othercomputers/Il mio MacBook Pro/MagicKnob

    path = "/content/drive/Othercomputers/Il mio MacBook Pro/MagicKnob/"

    # check python file folder
    assert os.path.exists(path + "tensorflow"), f"Upload python files in {path}tensorflow"
    %cd ./tensorflow

    # check data folder
    assert os.path.exists(path + "data"), f"Upload data files in {path}data"
else:
    path = "../"

    # check python file folder
    assert os.path.exists(path + "tensorflow"), f"Upload python files in {path}python"

    # check data folder
    assert os.path.exists(path + "data"), f"Upload data files in {path}data"

/content
Mounted at /content/drive
/content/drive/Othercomputers/Il mio MacBook Pro/MagicKnob
/content/drive/Othercomputers/Il mio MacBook Pro/MagicKnob/tensorflow


In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras

tfk = tf.keras
tfkl = tf.keras.layers

from model_utils import save_model

In [3]:
# used for the writing of example outputs
run_name="audio_ht1"
# dataset : need an input and output folder in this folder
audio_folder = f"../data/{run_name}"
#audio_folder = "../../data/audio_ht1"
assert os.path.exists(audio_folder), f"Audio folder  not found. Looked for {audio_folder}"
# used to render example output during training
test_file = "../data/guitar.wav"
assert os.path.exists(test_file), "Test file not found. Looked for " + test_file

In [4]:
# initialize net specs
lstm_hidden_size = 64
learning_rate = 5e-3
batch_size = 20 # già default

In [5]:
import myk_data
import myk_loss

input_audio_folder = f'{audio_folder}/input'
output_audio_folder = f'{audio_folder}/output'

In [6]:
[train_dataset, val_dataset] = myk_data.generate_dataset(input_audio_folder, output_audio_folder)

loading input and output of ht1
    loading output of ht1
    generate_dataset:: Loaded frames from audio file 2048
    found input fragments of shape (1291, 2048, 1)
    found output fragments of shape (1291, 2048, 1)
    total input shape: (1291, 2048, 1)
    total output shape: (1291, 2048, 1)



In [7]:
train_dataset

<_TensorSliceDataset element_spec=(TensorSpec(shape=(2048, 1), dtype=tf.float32, name=None), TensorSpec(shape=(2048, 1), dtype=tf.float32, name=None))>

In [8]:
train_dataset = train_dataset.batch(batch_size)
val_dataset = val_dataset.batch(batch_size)

In [9]:
train_dataset

<_BatchDataset element_spec=(TensorSpec(shape=(None, 2048, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None, 2048, 1), dtype=tf.float32, name=None))>

In [16]:
def build_LSTM(input_shape, hidden_size):
    # Build the neural network layer by layer
    input_layer = tfkl.Input(shape=input_shape, name='Input')

    dense = tfkl.Dense(8, activation='tanh')(input_layer)

    conv1d = tfkl.Conv1D(4, 3, 2, activation='tanh')(dense)

    gru = tfkl.GRU(8, return_sequences=True)(conv1d)

    output_layer = tfkl.Dense(1, activation=None)(gru)

    # Connect input and output through the Model class
    model = tfk.Model(inputs=input_layer, outputs=output_layer, name='model')

    # Compile the model
    model.compile(loss=myk_loss.LossWrapper(), optimizer=tfk.optimizers.Adam(learning_rate=learning_rate), metrics=['accuracy'])

    # Return the model
    return model

#model = build_LSTM((None, 1), lstm_hidden_size)

model = keras.Sequential()
model.add(keras.layers.InputLayer(input_shape=(None, 1)))
model.add(keras.layers.Dense(8, activation='tanh', kernel_initializer='random_normal', bias_initializer='random_normal'))
model.add(keras.layers.Conv1D(4, 3, dilation_rate=2, activation='tanh', padding='causal', kernel_initializer='glorot_uniform', bias_initializer='random_normal'))
model.add(keras.layers.GRU (8, activation="tanh", return_sequences=True, recurrent_activation="sigmoid", use_bias=True, kernel_initializer="glorot_uniform", recurrent_initializer="orthogonal", bias_initializer="random_normal",))
model.add(keras.layers.Dense(1, kernel_initializer='orthogonal', bias_initializer='random_normal'))

model.compile(loss=myk_loss.LossWrapper(), optimizer=tfk.optimizers.Adam(learning_rate=learning_rate), metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, None, 8)           16        
                                                                 
 conv1d_3 (Conv1D)           (None, None, 4)           100       
                                                                 
 gru_3 (GRU)                 (None, None, 8)           336       
                                                                 
 dense_7 (Dense)             (None, None, 1)           9         
                                                                 
Total params: 461
Trainable params: 461
Non-trainable params: 0
_________________________________________________________________


In [18]:
# Train the model
history = model.fit(
    x = train_dataset,
    epochs = 500,
    validation_data = val_dataset,
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=3, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=3, factor=0.5, min_lr=1e-5)
    ]
).history

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500


In [19]:
guitar = myk_data.load_wav_file(test_file, 44100)

In [24]:
import soundfile
from datetime import datetime

now = datetime.now()
dt_string = now.strftime(f"models/%d-%m-%Y_%H-%M-%S")

os.mkdir(os.path.join(".", dt_string))

# save models
with open(f'{dt_string}/model_structure.txt', "w") as model_structure:
    model_structure.write(str(model.summary()))            # model summary to recreate

save_model(model, f'./{dt_string}/model_RTNeur.json') # RTNeur
model.save(f'./{dt_string}/model.h5')                 # tensorflow

guitar_dist = model.predict(guitar)
test = guitar_dist.flatten()
test = myk_data.normalize(test)
soundfile.write(f"{dt_string}/guitar_dist.wav", test, 44100)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, None, 8)           16        
                                                                 
 conv1d_3 (Conv1D)           (None, None, 4)           100       
                                                                 
 gru_3 (GRU)                 (None, None, 8)           336       
                                                                 
 dense_7 (Dense)             (None, None, 1)           9         
                                                                 
Total params: 461
Trainable params: 461
Non-trainable params: 0
_________________________________________________________________
