In [1]:
!git clone https://github.com/TheSoundOfAIOSR/rg_sound_generation.git
%cd rg_sound_generation

fatal: destination path 'rg_sound_generation' already exists and is not an empty directory.
/content/rg_sound_generation


In [2]:
!pip install -q -r requirements_colab.txt

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd
import soundfile as sf
import tsms
from tqdm import tqdm
from tcae.model import MtVae
from tcae.localconfig import LocalConfig
from tcae.dataset import get_dataset
from tcae.train import get_zero_batch, validation_step


conf = LocalConfig()

target_dir = "/content/drive/MyDrive/the_sound_of_ai/new_data/sounds"

In [5]:
conf.load_config_from_file("deployed/conf.json")
conf.dataset_dir = "/content/drive/MyDrive/the_sound_of_ai/new_data"
conf.batch_size = 1

In [6]:
model = MtVae(conf)
_ = model(get_zero_batch(conf))
model.load_weights("deployed/model.h5")

_, _, test_dataset = get_dataset(conf)

Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.


In [7]:
loss_keys = ["loss", "f0_loss", "h_freq_shifts_loss", "mag_env_loss", 
             "h_mag_dist_loss", "h_mag_loss", "no_mask_h_mag_loss", "h_phase_diff_loss"]


results = dict((k, []) for k in loss_keys)
results["name"] = []


for batch in tqdm(iter(test_dataset)):
    losses = validation_step(model, batch)
    name = batch["name"][0]
    name = name.numpy()[0].decode()

    for k in loss_keys:
        results[k].append(losses[k].numpy())
    results["name"].append(name)
    
    note_number = batch["note_number"]
    note_number = tf.argmax(note_number, axis=-1) + conf.starting_midi_pitch
    note_number = note_number[:, tf.newaxis, tf.newaxis]
    
    preds = model.predict(batch)
    transformed = conf.data_handler.prediction_transform(preds)

    h_freq, h_mag, h_phase = conf.data_handler.denormalize(transformed, batch["mask"], note_number)
    audio = tsms.core.harmonic_synthesis(h_freq, h_mag, h_phase, conf.sample_rate, conf.frame_size)
    audio = audio[0]
    audio = np.array(audio) / np.max(np.abs(audio))

    target_path = os.path.join(target_dir, f"{name}.wav")
    sf.write(target_path, audio, samplerate=conf.sample_rate)

1588it [1:15:24,  2.85s/it]


In [8]:
df = pd.DataFrame(results)
df.head()

Unnamed: 0,loss,f0_loss,h_freq_shifts_loss,mag_env_loss,h_mag_dist_loss,h_mag_loss,no_mask_h_mag_loss,h_phase_diff_loss,name
0,0.009196,0.000508,0.000211,0.000327,0.002806,0.005344,0.041198,0.0,guitar_electronic_046-072-127
1,0.01372,0.000239,0.003843,0.000958,0.00471,0.00397,0.020782,0.0,guitar_electronic_021-088-100
2,0.00663,9e-06,3.1e-05,0.00024,0.000631,0.005719,0.055289,0.0,guitar_electronic_013-058-127
3,0.004384,0.000133,0.001927,0.000409,0.000581,0.001334,0.020037,0.0,guitar_acoustic_003-056-075
4,0.006099,0.000501,0.00153,0.000525,0.000632,0.002911,0.028897,0.0,guitar_acoustic_003-087-075


In [10]:
df.to_csv("test_results.csv")

!mv test_results.csv $conf.checkpoints_dir

Export synthesised audio for ground truth

In [12]:
for batch in tqdm(iter(test_dataset)):
    name = batch["name"][0]
    name = name.numpy()[0].decode()

    note_number = batch["note_number"]
    note_number = tf.argmax(note_number, axis=-1) + conf.starting_midi_pitch
    note_number = note_number[:, tf.newaxis, tf.newaxis]

    h_freq, h_mag, h_phase = conf.data_handler.denormalize(batch, batch["mask"], note_number)
    audio = tsms.core.harmonic_synthesis(h_freq, h_mag, h_phase, conf.sample_rate, conf.frame_size)
    audio = audio[0]
    audio = np.array(audio) / np.max(np.abs(audio))

    target_path = os.path.join(target_dir, f"{name}_true.wav")
    sf.write(target_path, audio, samplerate=conf.sample_rate)

1588it [03:18,  8.01it/s]
