# Latent Space Explorer for music Generator

## Setup

In [1]:
import tensorflow as tf
!pip install tensorflow-io -q
import tensorflow_io as tfio
import ipywidgets as ipw
import IPython
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile as wf

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

model_path = "drive/MyDrive/Master Thesis/notebooks/models/train/20230428-085910"
G = tf.keras.models.load_model(model_path + "/gen_8.h5", custom_objects={'wasserstein_loss':None})

param_file = model_path + "/params.py"
!cp "$param_file" .
import params

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m28.0/28.0 MB[0m [31m47.0 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive




### Functions

In [2]:
# uses griffin-lim if no model i supplied
def mel_to_audio(S):
    S = tf.cast(S, tf.float32)

    # mel to linear
    mel_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins=params.MEL_BINS,
        num_spectrogram_bins=params.STFT_WINDOW//2+1,
        sample_rate=params.SAMPLE_RATE,
        lower_edge_hertz=params.MEL_FREQ_MIN,
        upper_edge_hertz=params.MEL_FREQ_MAX,
        dtype=tf.dtypes.float32
    )
    with np.errstate(divide="ignore", invalid="ignore"):
        mel_inversion_matrix = tf.constant(
            np.nan_to_num(
                np.divide(tf.transpose(mel_matrix.numpy()), np.sum(mel_matrix.numpy(), axis=1))
            ).T
        )
    S = tf.tensordot(S, tf.transpose(mel_inversion_matrix), 1)
    
    # dB to amplitude
    S = tf.pow(tf.ones(tf.shape(S)) * 10.0, (S) / 20)  # 10^(dB / 20)
    
    # Griffin-Lim:
    return tfio.audio.inverse_spectrogram(S, params.STFT_WINDOW, params.STFT_WINDOW, params.STFT_HOP, iterations=params.GRIFFIN_LIM_ITER).numpy()

@tf.function
def oh_rhythm(r):
    return tf.squeeze(tf.one_hot(r, n_rhy), axis=1)

class DeNormSpect(tf.keras.layers.Layer):
    def __init__(self, min_dB=-40., max_dB=70., **kwargs):
        super().__init__(**kwargs)
        self.min = min_dB
        self.max = max_dB

        r = self.max - self.min
        self.rescale = tf.keras.layers.Rescaling(
            scale=r,
            offset=self.min
        )
    
    def call(self, spect):
        spect = tf.clip_by_value(spect, 0., 1.)
        spect = self.rescale(spect)
        return spect

denormalize_spect = DeNormSpect()

In [3]:
def show_spectrogram(S, output=None, title='', n_spect=1, scale=5):
    plt.figure(figsize=(scale*n_spect, scale))
    plt.imshow(tf.transpose(S), origin='lower', vmin=-40, vmax=70)
    plt.axis('off')
    plt.title(title)
    if output is not None:
        with output:
            output.clear_output()
            plt.show()
    else:
        plt.show()

def show_audio(S, output):
    S = tf.squeeze(S)
    rec_aud = mel_to_audio(S)
    wf.write('rec.wav', params.SAMPLE_RATE, rec_aud)
    rec = IPython.display.Audio('rec.wav')
    if output is not None:
        with output:
            display(rec)
    else:
        display(rec)

## Output

## Code

In [4]:
output = ipw.Output()
make_aud_b = ipw.Button(description="Make Audio")
rand_b = ipw.Button(description="Randomise State")
aud_output = ipw.Output()
slider_output = ipw.Output()

## Display

In [5]:
display(output, rand_b, make_aud_b, aud_output)

Output()

Button(description='Randomise State', style=ButtonStyle())

Button(description='Make Audio', style=ButtonStyle())

Output()

## Code

In [6]:
current_state = np.zeros((1, params.GEN_NOISE_DIM))
current_aro = np.zeros((1, 1))
current_val = np.zeros((1, 1))

n_rhy = params.RL_HI_BOUND - params.RL_LOW_BOUND
current_rhy = np.random.randint(0, n_rhy, (1, 1))
out = denormalize_spect(G([current_state, [current_aro, current_val], oh_rhythm(current_rhy)])).numpy()
sliders = None
slider_aro = slider_val = None
slider_box = None


def make_audio(b):
    aud_output.clear_output()
    if out is not None:
        show_audio(out, output=aud_output)
    else:
        with aud_output:
            print("You have to generate something first!")

def update_output():
    global out
    out = G([current_state, [current_aro, current_val], oh_rhythm(current_rhy)])
    out = denormalize_spect(out).numpy()
    show_spectrogram(out, output=output)
    with output:
        print(f"Norm: {np.sqrt(np.sum(np.square((current_state[0] / 30) * 100))):.1f}% of max")

def update_state(change):
    global out
    s = change['owner'].description
    if s == "aro":
        current_aro[0,0] = change['new']
    elif s == "val":
        current_val[0,0] = change['new']
    elif s == "rhy":
        current_rhy[0,0] = int(change['new'])
    else:
        n = int(s)
        current_state[0,n] = change['new']
    update_output()

def make_slider(n = 0, val=0.0, min=-3, max=3):
    s = ipw.FloatSlider(
        value=val,
        min=min,
        description=f"{n}",
        max=max,
        step=0.01,
        disabled=False,
        continuous_update=False,
        orientation='vertical',
        readout=True,
        readout_format='.2f',
    )
    s.observe(update_state, names='value')
    return s

def make_slider_box(sliders, div=15):
    global slider_aro, slider_val
    n = len(sliders)
    hboxes = []
    slider_output.clear_output()
    with slider_output:
        hboxes.append(ipw.HBox([slider_aro, slider_val, slider_rhy]))
        for i in range(int(np.ceil(n/10))):
            hboxes.append(ipw.HBox(sliders[i*div:(i+1)*div]))
        display(ipw.VBox(hboxes))

def randomise(b):
    global current_state, current_aro, current_val, slider_aro, slider_val, current_rhy
    current_state = np.array(tf.random.normal((1, params.GEN_NOISE_DIM)))
    current_aro = np.array(tf.random.uniform((1, 1), minval=-1, maxval=1))
    current_val = np.array(tf.random.uniform((1, 1), minval=-1, maxval=1))
    current_rhy = np.random.randint(0, n_rhy, (1, 1))
    if sliders is not None:
        for i in range(len(sliders)):
            sliders[i] = make_slider(i, val=current_state[0, i])
        slider_aro = make_slider('aro', val=current_aro, min=-1, max=1)
        slider_val = make_slider('val', val=current_val, min=-1, max=1)
        slider_rhy = make_slider('rhy', val=current_rhy, min=0, max=n_rhy)
        make_slider_box(sliders)
    update_output()

rand_b.on_click(randomise)
make_aud_b.on_click(make_audio)

update_output()

## Display

In [7]:
display(slider_output)

Output()

## Code

In [8]:
sliders = []
slider_aro = make_slider('aro', val=0, min=-1, max=1)
slider_val = make_slider('val', val=0, min=-1, max=1)
slider_rhy = make_slider('rhy', val=current_rhy, min=0, max=n_rhy)
for i in range(100):
    s = make_slider(i)
    sliders.append(s)

make_slider_box(sliders)