# Automatic Tuning of SAMPLE hyperparameters
In this notebook we will see how to automatically tune the hyperparameters of SAMPLE

## Setup

### Libraries
Install the `sample` package and its dependencies.
The extras will install dependencies for helper functions such as plots

In [None]:
import sys
# !$sys.executable -m pip install -qU ..
from sample import __version__
from sample.vid import logo
print("SAMPLE version:", __version__)
logo(size_inches=6)

### Load audio
Download the test audio or load your own audio file. In this notebook, you can specify

   - a filename: to load the audio from file
   - a URL: to download the audio file from the web (only if fname is empty)
   - start time and length (in seconds): to cut the audio file

In [None]:
from matplotlib import pyplot as plt
from librosa.display import waveshow
from IPython import display as ipd
import numpy as np
import functools
import librosa
import requests
import os

@functools.wraps(ipd.Audio)
def play(*args, **kwargs):
  ipd.display(ipd.Audio(*args, **kwargs))

def resize(w=12, h=6):
  plt.gcf().set_size_inches([w, h])

fname = "" #@param {type: "string"}
url = "http://soundbible.com/grab.php?id=2190&type=wav" #@param {type: "string"}
start_time = 1.298 #@param {type: "number"}
time_length = 3 #@param {type: "number"}

if not fname:
  _fname = "_testaudio.wav"
  r = requests.get(url)
  with open(_fname, "wb") as f:
    f.write(r.content)
else:
  _fname = fname

x, fs = librosa.load(_fname, sr=None)

i_0 = int(start_time * fs)
i_1 = i_0 + int(time_length * fs)

x = x[i_0:i_1]

if not fname:
  os.remove(_fname)

waveshow(x, sr=fs, alpha=.5, zorder=100)
plt.grid()
resize()
play(x, rate=fs)

## Define optimization problem

Define fixed parameters, that will not be tuned by the optimizer.
We will put a limit on the maximum number of synthesized modes (`max_n_modes=64`) to avoid excessive overfitting.

In [None]:
sample_opt_fixed=dict(
  max_n_modes=64,
  sinusoidal_model__reverse=True,
  sinusoidal_model__safe_sine_len=2,
  sinusoidal_model__overlap=0.5,
  sinusoidal_model__frequency_bounds=(50, 20e3),
)

Define the space of the parameters to tune. We will automatically adjust
 - the logarithm of the FFT size
 - the number of sinusoidal peaks per window
 - the threshold for peak detection
 - the minimum trajectory duration

In [None]:
import skopt.space
sample_opt_space = dict(
  sinusoidal_model__log_n=skopt.space.Integer(
    6, 14, name="log2(n)"),
  sinusoidal_model__max_n_sines=skopt.space.Integer(
    16, 128, name="n sines"),
  sinusoidal_model__peak_threshold=skopt.space.Real(
    -120, -30, name="peak threshold"),
  sinusoidal_model__min_sine_dur=skopt.space.Real(
    0, 0.5, name="min duration"),
)

We will use the cochleagram to define an objective function

In [None]:
from sample.evaluation.metrics import CochleagramLoss
from sample.utils.dsp import complex2db
from functools import partial

cochleagram_loss = CochleagramLoss(fs=fs,
                                   normalize=True,
                                   analytical="ir",
                                   stride=int(fs * 0.008),
                                   postprocessing=partial(complex2db,
                                                          floor=-60,
                                                          floor_db=True))

## Optimize
Run the optimization procedure.
Depending on the number of iterations, this could take a couple or more minutes

In [None]:
from tqdm.notebook import tqdm_notebook
import sample.optimize

#@markdown Check this to restart the optimization from scratch
reset = True #@param {type:"boolean"}
#@markdown ---
#@markdown Number of optimization iterations
n_minimizing_points = 32 #@param {type:"integer"}
#@markdown Number of exploratory iterations
n_initial_points = 32 #@param {type:"integer"}
#@markdown ---
#@markdown Random seed
seed = 42 #@param {type:"integer"}

n_calls = n_minimizing_points + n_initial_points
if reset or "opt_res" not in locals():
  opt_res = None
sample_opt = sample.optimize.SAMPLEOptimizer(
  sample_kw=sample_opt_fixed,
  loss_fn=cochleagram_loss,
  **sample_opt_space,
)
tqdm_cbk = sample.optimize.TqdmCallback(
  sample_opt=sample_opt,
  n_calls=n_calls,
  n_initial_points=n_initial_points,
  tqdm_fn=tqdm_notebook,
)
opt_model, opt_res = sample_opt.gp_minimize(
  x=x, fs=fs,
  n_calls=n_calls,
  n_initial_points=n_initial_points,
  callback=tqdm_cbk,
  initial_point_generator="lhs",
  acq_func="LCB",
  state=opt_res,
  random_state=seed,
)

### Listen back
Listen to an additive resynthesis of the sound based on the estimated modal parameters.
You can change the number of synthesized modes.

In [None]:
from sample.psycho import cochleagram, hz2cams
from sample.plots import tf_plot

#@markdown Number of modes for resynthesis
n_modes = 8 #@param {type:"integer"}

n_modes_old = opt_model.get_params()["max_n_modes"]
opt_model.set_params(max_n_modes=n_modes)
x_hat = np.clip(
    opt_model.predict(np.arange(x.size) / fs, phases="random", seed=1), -1, +1)
opt_model.set_params(max_n_modes=n_modes_old)

ax = plt.subplot(211)
x_dual = np.array([x, x_hat])
for l, xi in zip(("original", "resynthesis"), x_dual):
  play(xi, rate=fs, label=l)
  waveshow(xi, sr=fs, alpha=.5, zorder=100, label=l, ax=ax)
plt.grid()
plt.legend()

coch_x, cfreq = cochleagram(x,
                            filterbank=cochleagram_loss.filterbank,
                            **cochleagram_loss.postprocessing)
ax = plt.subplot(223, sharex=ax)
tf_plot(coch_x,
        ax=ax,
        tlim=(0, x.size / fs),
        flim=hz2cams(cfreq[[0, -1]]),
        cmap="Blues")
ax.set_title("original")

coch_x_hat, _ = cochleagram(x_hat,
                            filterbank=cochleagram_loss.filterbank,
                            **cochleagram_loss.postprocessing)
ax = plt.subplot(224, sharex=ax)
tf_plot(coch_x_hat,
        ax=ax,
        tlim=(0, x.size / fs),
        flim=hz2cams(cfreq[[0, -1]]),
        cmap="Oranges")
ax.set_title("resynthesis")

resize(12, 12)