<a href="https://colab.research.google.com/github/NadiaCarvalho/SMC-TimbreLandscape/blob/main/Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Experiment 0

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
#@title Auxiliar Functions

%cd /content/drive/MyDrive/AdapExperiments/SMC-RAVE/

import itertools

import bokeh
import librosa
import numpy as np
import torch

from tqdm.notebook import tqdm

def get_audios(a_to_anal, model, normal=True):
  audios = {}
  for i, f in enumerate(tqdm(a_to_anal)):
    audios[i] = {'name': f}
    audios[i]['original_audio'], audios[i]['sr'] = librosa.load(f, sr=48000)

    if normal:
      audios[i]['audio'] = librosa.util.normalize(audios[i]['original_audio'])
    else:
      audios[i]['audio'] = audios[i]['original_audio']

    x = torch.from_numpy(audios[i]['audio'])
    x = x[None, None, :]

    with torch.no_grad():
      z = model.encode(x)
      audios[i]['latent space'] = z
      audios[i]['latent space mean'] = z - z.mean(0)
      audios[i]['reconstruction'] = model(x).squeeze(0).detach().numpy()

  return audios

def get_dimensionality_reduction(algorithm, latent_spaces, n_components=2):
  if algorithm == 'pca':
      from sklearn.decomposition import PCA
      from sklearn.preprocessing import StandardScaler
      pca = PCA(n_components=n_components)
      predictions = pca.fit_transform(
      StandardScaler().fit_transform(latent_spaces))
  elif algorithm == 'tsne':
      from sklearn.manifold import TSNE
      tsne = TSNE(n_components=n_components, perplexity=30 if len(latent_spaces) > 30 else len(latent_spaces)-1)
      predictions = tsne.fit_transform(np.asarray(latent_spaces))
  elif algorithm == 'mds':
      from sklearn.manifold import MDS
      mds = MDS(n_components=n_components, normalized_stress="auto")
      predictions = mds.fit_transform(latent_spaces)
  elif algorithm == 'isomap':
      from sklearn.manifold import Isomap
      iso = Isomap(n_components=n_components)
      predictions = iso.fit_transform(latent_spaces)
  elif algorithm == 'lle':
      from sklearn.manifold import LocallyLinearEmbedding
      lle = LocallyLinearEmbedding(n_components=n_components)
      predictions = lle.fit_transform(latent_spaces)
  else:
      import umap.umap_ as umap
      reducer = umap.UMAP(n_components=n_components)
      predictions = reducer.fit_transform(latent_spaces)
  return predictions

def get_audio_colors_labels(audios, get_colors=True):


  lats_0 = [a['latent space'].squeeze(0).detach().numpy().T for a in audios.values()]
  labels_0 = ['/'.join(a['name']).split('/')[-1][:-4] for a in audios.values()]

  lats = np.concatenate(tuple(lats_0), axis=0)
  labels = [item for row in [[f'{j}:: {l}' for j in range(lats_0[i].shape[0])] for i, l in enumerate(labels_0)] for item in row]

  if get_colors:
    cl_x = itertools.cycle(bokeh.palettes.Category20_20)
    colors = [item for row in [[next(cl_x)] * lats_0[i].shape[0] for i, l in enumerate(labels_0)] for item in row]
    return lats, labels, colors
  return lats, labels

/content/drive/MyDrive/AdapExperiments/SMC-RAVE


# Cluster Analysis

In [None]:
sorted(['/'.join(x.split('/')[-2:]) for x in glob.glob('dataset/*/*.wav')])

In [None]:
#@title Cluster Metrics

import numpy as np
import numba as nb

def normalize(x, min, max):
    return (x - min) / (max - min)

def denormalize(x, min, max):
    return x * (max - min) + min

from sklearn.preprocessing import LabelEncoder

DIAMETER_METHODS = ['mean_cluster', 'farthest']
CLUSTER_DISTANCE_METHODS = ['nearest', 'farthest']

def inter_cluster_distances(labels, distances, method='nearest'):
    """Calculates the distances between the two nearest points of each cluster.

    :param labels: a list containing cluster labels for each of the n elements
    :param distances: an n x n numpy.array containing the pairwise distances between elements
    :param method: `nearest` for the distances between the two nearest points in each cluster, or `farthest`
    """
    if method not in CLUSTER_DISTANCE_METHODS:
        raise ValueError(
            'method must be one of {}'.format(CLUSTER_DISTANCE_METHODS))

    if method == 'nearest':
        return __cluster_distances_by_points_2(labels, distances)
    elif method == 'farthest':
        return __cluster_distances_by_points_2(labels, distances, farthest=True)

def __cluster_distances_by_points(labels, distances, farthest=False):
    n_unique_labels = len(np.unique(labels))
    cluster_distances = np.full((n_unique_labels, n_unique_labels),
                                float('inf') if not farthest else 0)

    np.fill_diagonal(cluster_distances, 0)

    for i in np.arange(0, len(labels) - 1):
        for ii in np.arange(i, len(labels)):
            if labels[i] != labels[ii] and (
                (not farthest and
                 distances[i, ii] < cluster_distances[labels[i], labels[ii]])
                    or
                (farthest and
                 distances[i, ii] > cluster_distances[labels[i], labels[ii]])):
                cluster_distances[labels[i], labels[ii]] = cluster_distances[
                    labels[ii], labels[i]] = distances[i, ii]
    return cluster_distances


def __cluster_distances_by_points_2(labels, distances, farthest=False):
    n_unique_labels = len(np.unique(labels))
    cluster_distances = np.full((n_unique_labels, n_unique_labels),
                                np.inf if not farthest else 0)

    np.fill_diagonal(cluster_distances, 0)

    mask = labels[:, None] != labels  # Create a boolean mask for different labels
    idx = np.triu_indices(len(labels), k=1)  # Only upper triangle part

    if not farthest:
        # For minimum distances
        cluster_distances[labels[idx[0]], labels[idx[1]]] = distances[idx]
    else:
        # For farthest distances
        cluster_distances[labels[idx[0]], labels[idx[1]]] = -distances[idx]

    # Update mirrored entries
    cluster_distances[labels[idx[1]], labels[idx[0]]] = cluster_distances[labels[idx[0]], labels[idx[1]]]

    return cluster_distances

def diameter(labels, distances, method='farthest'):
    """Calculates cluster diameters

    :param labels: a list containing cluster labels for each of the n elements
    :param distances: an n x n numpy.array containing the pairwise distances between elements
    :param method: either `mean_cluster` for the mean distance between all elements in each cluster, or `farthest` for the distance between the two points furthest from each other
    """
    if method not in DIAMETER_METHODS:
        raise ValueError('method must be one of {}'.format(DIAMETER_METHODS))

    n_clusters = len(np.unique(labels))
    diameters = np.zeros(n_clusters)

    if method == 'mean_cluster':
        for i in range(0, len(labels) - 1):
            for ii in range(i + 1, len(labels)):
                if labels[i] == labels[ii]:
                    diameters[labels[i]] += distances[i, ii]

        for i in range(len(diameters)):
            diameters[i] /= sum(labels == i)

    elif method == 'farthest':
        for i in range(0, len(labels) - 1):
            for ii in range(i + 1, len(labels)):
                if labels[i] == labels[ii] and distances[i, ii] > diameters[
                        labels[i]]:
                    diameters[labels[i]] = distances[i, ii]
    return diameters

def dunn(labels, distances, diameter_method='farthest',
         cdist_method='nearest'):
    """
    Dunn index for cluster validation (larger is better).

    .. math:: D = \\min_{i = 1 \\ldots n_c; j = i + 1\ldots n_c} \\left\\lbrace \\frac{d \\left( c_i,c_j \\right)}{\\max_{k = 1 \\ldots n_c} \\left(diam \\left(c_k \\right) \\right)} \\right\\rbrace

    where :math:`d(c_i,c_j)` represents the distance between
    clusters :math:`c_i` and :math:`c_j`, and :math:`diam(c_k)` is the diameter of cluster :math:`c_k`.

    Inter-cluster distance can be defined in many ways, such as the distance between cluster centroids or between their closest elements.
    Cluster diameter can be defined as the mean distance between all elements in the cluster, between all elements to the cluster centroid, or as the distance between the two furthest elements.

    The higher the value of the resulting Dunn index, the better the clustering
    result is considered, since higher values indicate that clusters are
    compact (small :math:`diam(c_k)`) and far apart (large :math:`d \\left( c_i,c_j \\right)`).

    :param labels: a list containing cluster labels for each of the n elements
    :param distances: an n x n numpy.array containing the pairwise distances between elements
    :param diameter_method: see :py:function:`diameter` `method` parameter
    :param cdist_method: see :py:function:`diameter` `method` parameter

    .. [Kovacs2005] Kovács, F., Legány, C., & Babos, A. (2005). Cluster validity measurement techniques. 6th International Symposium of Hungarian Researchers on Computational Intelligence.
    """

    labels = LabelEncoder().fit(labels).transform(labels)

    ic_distances = inter_cluster_distances(labels, distances, cdist_method)
    min_distance = min(ic_distances[ic_distances.nonzero()]) # type: ignore
    max_diameter = max(diameter(labels, distances, diameter_method))

    return min_distance / max_diameter

In [None]:
# @title Import Model

import ipywidgets
import glob
import torch

from IPython.display import display, clear_output

coptions=sorted([x.split('/')[-1] for x in glob.glob('RAVE_MODELS/*.ts')])

model_name_content = ipywidgets.Dropdown(
    options=coptions,
    description='Content:',
    value='sol_full.ts'
)

out1 = ipywidgets.Output()
display(out1)

model = None
poss_dims = 128
norm_audios, non_norm_audios = None, None

@ipywidgets.interact
def get_visualizations(
    model_name=model_name_content,
):
  global model, poss_dims, norm_audios, non_norm_audios
  model = torch.jit.load(f'/content/drive/MyDrive/AdapExperiments/models/RAVE_MODELS/{model_name}')
  print(f'Loaded model {model_name}')

  if '_z' in model_name:
    poss_dims = int(model_name.split('_z')[1].split('.ts')[0])
  else:
    try:
      poss_dims = int(model.latent_size)
    except Exception as e:
      print(e)
      poss_dims = 128

  print(f'Latent Size: {poss_dims}')

  import glob

  norm_audios = get_audios([f'dataset/{name}' for name in sorted(['/'.join(x.split('/')[-2:]) for x in glob.glob('dataset/*/*.wav')])], model, normal=True)
  non_norm_audios = get_audios([f'dataset/{name}' for name in sorted(['/'.join(x.split('/')[-2:]) for x in glob.glob('dataset/*/*.wav')])], model, normal=False)


# VCTK: pitch is captured very low
# Isis: seems to capture pitch
# Musicnet: captures pitch but more noise than ISIS
# Sol (ordinario fast seems to capture better)

Output()

interactive(children=(Dropdown(description='Content:', index=9, options=('VCTK.ts', 'birds_pluma_b2048_r48000_…

In [None]:
# Layer to layer ou pares de layers

# Timbre with dynamic normalized (no subtone, nor multiphonics) or a specific dynamic
# Notas Longas sem vibrato -> com dinâmicas originais (ver onde dinâmicas e alturas são mais importantes)
# Dinâmicas e alturas com vários timbres -> layers

# Nas melhores layers a separar dinâmicas, timbres e alturas, como se relacionam os clusters?

In [None]:
#@title Create Clusters

import glob
import ipywidgets
import time

from tqdm.notebook import tqdm

coptions = sorted(['/'.join(x.split('/')[-2:]) for x in glob.glob('dataset/*/*.wav')])

techniques = ipywidgets.SelectMultiple(
    options=set([x.split('/')[0] for x in coptions]),
    description='Techniques:',
    layout=ipywidgets.Layout(width='20%', height='100px'),
)

pitch_range = ipywidgets.SelectMultiple(
    options=['Altissimo', 'High', 'Medium', 'Low'],
    description='Pitch:',
    layout=ipywidgets.Layout(width='20%', height='100px'),
)

dynamics = ipywidgets.SelectMultiple(
    options=['VerySoft', 'Soft', 'Medium', 'Loud', 'VeryLoud'],
    description='Dynamic:',
    layout=ipywidgets.Layout(width='20%', height='100px'),
)

content = ipywidgets.SelectMultiple(
    options=coptions,
    description='Content:',
    layout=ipywidgets.Layout(width='30%', height='100px'),
    index=[0,1]
)

vis = ipywidgets.Dropdown(
    options=['mds', 'lle', 'pca', 'None'],
    description='Dimensionality Reduction Algorithm:',
    layout=ipywidgets.Layout(width='20%', height='100px'),
    value='None',
)

cluster_labels = ipywidgets.Select(
    options=['Technique', 'Range', 'Dynamic', 'All'],
    description='To Cluster:',
    layout=ipywidgets.Layout(width='20%', height='100px'),
    index=0
)

normalized = ipywidgets.Checkbox(
    value=0,
    description='Use Normalization'
)

box = ipywidgets.HBox([content, vis, cluster_labels, normalized])
display(box)

print('Filters')
@ipywidgets.interact
def filter_selected(techniques=techniques, pitch_ranges=pitch_range, dynamics=dynamics):
  content.index = [x for x, name in enumerate(coptions)
    if any(y in name for y in techniques)
    or any(y in name for y in pitch_ranges)
    or any(f'-{y}' in name for y in dynamics)
  ]

unselect = ipywidgets.Button(description='Clean Filters')
display(unselect)

def deselect(b):
  techniques.value=()
  pitch_range.value=()
  dynamics.value=()

unselect.on_click(deselect)



start = ipywidgets.Button(
    description='Start',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click me',
    icon='check' # (FontAwesome names without the `fa-` prefix)
)
print()
display(start)
print()


out = ipywidgets.Output(layout={'border': '0px solid black', 'padding': '.5em', 'width': '90%'})

audios = {}

def on_start_clicked(b):
  global audios, model

  with out:
    out.clear_output(wait=False)
    time.sleep(.5)

    print(f'Generating Latent Space with model {model_name_content.value}')

    #audios = get_audios([f'dataset/{name}' for name in content.value], model)
    if normalized.value == 1:
      audios = norm_audios
    else:
      audios = non_norm_audios

    lats_0 = [a['latent space'].squeeze(0).detach().numpy().T for a in audios.values()]
    labels_0 = ['/'.join(a['name'].split('/')[-2:])[:-4] for a in audios.values()]

    lats = np.concatenate(tuple(lats_0), axis=0)
    labels = [item for row in [[f'{l}' for j in range(lats_0[i].shape[0])] for i, l in enumerate(labels_0)] for item in row]

    if vis.value != 'None':
      print(vis.value)
      points = get_dimensionality_reduction(vis.value, lats, 2)
    else:
      points = lats

    def get_register(label):
      if 'Altissimo-' in label:
        return 'Altissimo'
      if 'High-' in label:
        return 'High'
      if 'Medium-' in label:
        return 'Medium'
      if 'Low-' in label:
        return 'Low'
      return None

    def get_dynamic(label):
      if 'VerySoft' in label:
        return 'Very Soft'
      if 'Soft' in label:
        return 'Soft'
      if 'VeryLoud' in label:
        return 'Very Loud'
      if 'Loud' in label:
        return 'Loud'
      else:
        return 'Medium'

    cluster_l = labels
    if cluster_labels.value == 'Technique':
      cluster_l = [l.split('/')[0] for l in labels]
    elif cluster_labels.value == 'Range':
      cluster_l = [get_register(l) for l in labels]
    elif cluster_labels.value == 'Dynamic':
      cluster_l = [get_dynamic(l) for l in labels]

    clusters = {label: np.asarray([l for i, l in enumerate(points) if cluster_l[i]==label]) for label in set(cluster_l) if label is not None}

    if len(clusters.keys()) < 2:
      raise Exception("Number of Clusters has to be bigger than 2")

    print(f'Analysing Clusters {list(clusters.keys())}\n\n')

    X_label = [label for label, cluster in clusters.items() for _ in cluster]
    X = [cl for cluster in clusters.values() for cl in cluster]

    print(f'NP: {len(X_label)}')

    from sklearn import metrics

    print('Silhouette Score:', metrics.silhouette_score(X, X_label, metric='euclidean'), '\n')
    print('Davies Bouldin Score:', metrics.davies_bouldin_score(X, X_label), '\n')
    print('Calinski Harabasz Score:', metrics.calinski_harabasz_score(X, X_label), '\n')

    euclidean_distances = metrics.pairwise.euclidean_distances(X)
    for diameter_method in DIAMETER_METHODS:
      for cdist_method in CLUSTER_DISTANCE_METHODS:
        try:
          print(f'Dunn Score ({diameter_method}, {cdist_method})', dunn(
              X_label, euclidean_distances, diameter_method, cdist_method), '\n')
        except:
          pass
    print('End')

start.on_click(on_start_clicked)
display(out)
print()

In [None]:
# @title Cluster by Layer { vertical-output: true, display-mode: "code" }
TO_CLUSTER = "All" # @param ["Technique", "Register", "Dynamic", "All"]
layer_pair = True # @param {type:"boolean"}

import pandas as pd
from sklearn import metrics
from tqdm.notebook import tqdm


def get_register(label):
  if 'Altissimo-' in label:
    return 'Altissimo'
  if 'High-' in label:
    return 'High'
  if 'Medium-' in label:
    return 'Medium'
  if 'Low-' in label:
    return 'Low'
  return None

def get_dynamic(label):
  if 'VerySoft' in label:
    return 'Very Soft'
  if 'Soft' in label:
    return 'Soft'
  if 'VeryLoud' in label:
    return 'Very Loud'
  if 'Loud' in label:
    return 'Loud'
  else:
    return 'Medium'

if TO_CLUSTER in ['Technique','Register']:
  audios = norm_audios
else:
  audios = non_norm_audios

register = 'All_NN_R'
no_slap = False
only_medium = False

lats_0 = [a['latent space'].squeeze(0).detach().numpy().T for a in audios.values()]
labels_0 = ['/'.join(a['name'].split('/')[-2:])[:-4] for a in audios.values()]

filtered_labels = labels_0
#filtered_labels = [lab for lab in labels_0 if lab.split('/')[0] in ['Long NON Vibrato']
"""
filtered_labels = [lab for lab in labels_0 if register in lab]
if no_slap:
  filtered_labels = [lab for lab in filtered_labels if 'Slap' not in lab]
if only_medium:
  filtered_labels = [lab for lab in filtered_labels
                    if not (any(x in lab for x in
                      ['Long NON Vibrato', 'Long Vibrato', 'Quarter-Tones'])
                    and any(x in lab for x in ['Very', 'Soft', 'Loud']))]
"""
#results = pd.DataFrame()
results = []
ls = list(range(poss_dims))
if layer_pair:
  ls = list(zip(list(range(poss_dims))[:-1], list(range(poss_dims))[1:]))

for layer in tqdm(ls):
  layer_val = [lat[:,layer] for i, lat in enumerate(lats_0) if labels_0[i] in filtered_labels]

  cluster_l = filtered_labels
  if TO_CLUSTER == 'Technique':
    cluster_l = [l.split('/')[0] for l in filtered_labels]
  elif TO_CLUSTER == 'Register':
    cluster_l = [get_register(l) for l in filtered_labels]
  elif TO_CLUSTER == 'Dynamic':
    cluster_l = [get_dynamic(l) for l in filtered_labels]

  clusters = {label: [l for i, l in enumerate(layer_val) if cluster_l[i] == label] for label in set(cluster_l) if label is not None}

  X_label = np.concatenate([[label]*len(cl) for label, cluster in clusters.items() for cl in cluster])
  if isinstance(layer, int):
    X = np.concatenate([cl.reshape(-1) for cluster in clusters.values() for cl in cluster]).reshape(-1, 1)
  else:
    X = np.concatenate([cl for cluster in clusters.values() for cl in cluster])

  #results.loc['Silhouette Score', f'Layer {layer}'] = metrics.silhouette_score(X, X_label, metric='euclidean')
  #results.loc['Davies Bouldin Score', f'Layer {layer}'] =
  #results.loc['Calinski Harabasz Score', f'Layer {layer}'] = metrics.calinski_harabasz_score(X, X_label)
  #results.loc['Dunn Index', f'Layer {layer}'] = metrics.davies_bouldin_score(X, X_label)

  #euclidean_distances = metrics.pairwise.euclidean_distances(X)

  #print(f'Layer {layer}')
  #print(f'DB: {str(metrics.davies_bouldin_score(X, X_label)).replace(".", ",")}')
  #print(f'DI: {dunn(X_label, euclidean_distances, "mean_cluster", "nearest")}')
  results.append(metrics.davies_bouldin_score(X, X_label))
  """
  for diameter_method in DIAMETER_METHODS:
    for cdist_method in CLUSTER_DISTANCE_METHODS:
      try:
        results.loc[f'Dunn Score ({diameter_method}, {cdist_method})', f'Layer {layer}'] = dunn(
            X_label, euclidean_distances, diameter_method, cdist_method)
      except:
        results.loc[f'Dunn Score ({diameter_method}, {cdist_method})', f'Layer {layer}'] = np.nan

  results.to_excel(f'model_{model_name_content.value}_{register}{"_no_slap" if no_slap else ""}.xlsx')
  """
print(np.asarray(results))

  0%|          | 0/15 [00:00<?, ?it/s]

[29.5345473  27.86337799 24.56714152 37.59288623 23.35306739 23.97376115
 47.40069701 24.53217909 47.34480007 54.45070657 28.76367708 51.78506773
 39.51376639 48.39666669 34.71334664]


In [None]:
results

Unnamed: 0,Layer 0,Layer 1,Layer 2,Layer 3,Layer 4,Layer 5,Layer 6,Layer 7
Silhouette Score,-0.225291,-0.200476,-0.200089,-0.297904,-0.178885,-0.199302,-0.092261,-0.087111
Davies Bouldin Score,39.323114,13.989399,13.433981,21.57969,18.76921,11.912675,542.76058,685.641957
Calinski Harabasz Score,20.869392,56.551662,116.396995,55.538727,47.264137,153.769157,1.09411,0.732654
"Dunn Score (mean_cluster, nearest)",0.000225,0.00021,0.000161,7.9e-05,0.000157,0.000283,0.000108,0.000453
"Dunn Score (mean_cluster, farthest)",-0.011772,-0.00412,-0.014487,-0.005336,-0.010207,-0.010607,-0.007234,-0.011552
"Dunn Score (farthest, nearest)",0.00993,0.007935,0.004524,0.005077,0.006079,0.011847,0.004347,0.016663
"Dunn Score (farthest, farthest)",-0.519137,-0.155444,-0.406333,-0.342409,-0.394477,-0.444426,-0.291323,-0.424791


# Visualizations

In [None]:
#@title Visualization Functions

import bokeh
import bokeh.io
import bokeh.plotting
import bokeh.models

bokeh.io.output_notebook()

def get_bokeh_source(predictions, labels, colors, radius=.01):
  return dict(
    x=predictions[:,0],
    y=predictions[:,1],
    label=[x.split('::')[1] for x in labels],
    id=[x.split('::')[0] for x in labels],
    color=colors,
    alpha=[.7] * len(labels),
    radius=[radius] * len(labels)
  )

def print_visualization_2d(source):

  hover = bokeh.models.HoverTool(tooltips=[
    ('Label', '@label'),
    ('ID', '@id'),
    ('', '----')
  ])

  p = bokeh.plotting.figure(
      toolbar_location='below',
      tools=['box_select', 'lasso_select', 'poly_select',
             'tap', 'reset', 'zoom_in', 'zoom_out', 'help',
             'pan', 'wheel_zoom', 'box_zoom', 'save', hover],
      )

  p.circle(x='x', y='y',
           radius='radius',
           color='color',
           alpha='alpha',
           #legend_label='Label',
           source=source)


  return p


def get_plot_dim(audios, dimension=[0,1]):
  lats, labels, colors = get_audio_colors_labels(audios)
  points = lats[:, dimension]
  rs = points.max(0) - points.min(0)
  return get_bokeh_source(points, labels, colors, float(rs.min()/((points.shape[0] / 2) if points.shape[0] > 100 else points.shape[0])))

def get_plot(audios, algorithm='lle'):
  lats, labels, colors = get_audio_colors_labels(audios)
  points = get_dimensionality_reduction(algorithm, lats, 2)
  rs = points.max(0) - points.min(0)
  return get_bokeh_source(points, labels, colors, float(rs.min()/((points.shape[0] / 2) if points.shape[0] > 100 else points.shape[0])))

In [None]:
# @title Import Model

import ipywidgets
import glob
import torch

from IPython.display import display, clear_output

coptions=sorted([x.split('/')[-1] for x in glob.glob('RAVE_MODELS/*.ts')])

model_name_content = ipywidgets.Dropdown(
    options=coptions,
    description='Content:',
)

out1 = ipywidgets.Output()
display(out1)

model = None
poss_dims = 128

@ipywidgets.interact
def get_visualizations(
    model_name=model_name_content,
):
  global model, poss_dims
  model = torch.jit.load(f'/content/drive/MyDrive/AdapExperiments/models/RAVE_MODELS/{model_name}')
  print(f'Loaded model {model_name}')

  if '_z' in model_name:
    poss_dims = int(model_name.split('_z')[1].split('.ts')[0])
  else:
    try:
      poss_dims = int(model.latent_size)
    except Exception as e:
      print(e)
      poss_dims = 128

  print(f'Latent Size: {poss_dims}')

# VCTK: pitch is captured very low
# Isis: seems to capture pitch
# Musicnet: captures pitch but more noise than ISIS
# Sol (ordinario fast seems to capture better)

Output()

interactive(children=(Dropdown(description='Content:', options=('VCTK.ts', 'birds_pluma_b2048_r48000_z12.ts', …

In [None]:
#@title Show All Latent Space

import ipywidgets

import itertools
import glob
import time

import bokeh
import torch
import librosa
import numpy as np

from IPython.display import display, clear_output
from tqdm.notebook import tqdm

from bokeh.io import curdoc

bokeh.io.output_notebook()

coptions=sorted(['/'.join(x.split('/')[-2:]) for x in glob.glob('dataset/*/*.wav')])

content = ipywidgets.SelectMultiple(
    options=coptions,
    description='Content:',
    layout=ipywidgets.Layout(width='40%', height='100px'),
    index=[0,1]
)

vis = ipywidgets.Dropdown(
    options=['mds', 'lle', 'pca'],
    description='Visualization\n Algorithm:',
    layout=ipywidgets.Layout(width='40%', height='100px')
)

box = ipywidgets.HBox([content, vis])
display(box)

start = ipywidgets.Button(
    description='Start',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click me',
    icon='check' # (FontAwesome names without the `fa-` prefix)
)
print()
display(start)
print()

out = ipywidgets.Output(layout={'border': '0px solid black', 'padding': '.5em', 'width': '90%'})

audios = {}
handle = None
graph = None
source = None

def on_start_clicked(b):
  global audios, handle, graph, source

  with out:
    out.clear_output(wait=False)
    time.sleep(1)

    print(f'Generating Latent Space with model {model_name_content.value}')

    audios = get_audios([f'dataset/{name}' for name in content.value], model)

  if handle is not None:
    source.data = get_plot(audios, vis.value)
    bokeh.io.push_notebook(handle=handle)
  else:
    source = bokeh.models.ColumnDataSource(get_plot(audios, vis.value))
    graph = print_visualization_2d(source)
    handle = bokeh.io.show(graph, notebook_handle=True)
    display(handle)

start.on_click(on_start_clicked)
display(out)
print()

In [None]:
#@title Visualize each Latent Dimension

import ipywidgets

import itertools
import glob
import time

import bokeh
import torch
import librosa
import numpy as np

from IPython.display import display, clear_output
from tqdm.notebook import tqdm

from bokeh.io import curdoc

bokeh.io.output_notebook()

coptions=sorted(['/'.join(x.split('/')[-2:]) for x in glob.glob('dataset/*/*.wav')])

content2 = ipywidgets.SelectMultiple(
    options=coptions,
    description='Content:',
    layout=ipywidgets.Layout(width='60%', height='100px'),
    index=[0,1]
)

vis1 = ipywidgets.Select(
    options=list(range(0, poss_dims, 1)),
    description='Dim x:',
    layout=ipywidgets.Layout(width='120%', height='45px'),
    index=0
)
vis2 = ipywidgets.Select(
    options=list(range(0, poss_dims, 1)),
    description='Dim y:',
    layout=ipywidgets.Layout(width='120%', height='45px'),
    index=1
)
start2 = ipywidgets.Button(
    description='Start',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click me',
    icon='check' # (FontAwesome names without the `fa-` prefix)
)
out2 = ipywidgets.Output(layout={'border': '0px solid black', 'padding': '.5em', 'width': '90%'})

audios = {}
handle2 = None
graph2 = None
source2 = None

def on_start2_clicked(b):
  global audios, handle2, graph2, source2

  with out2:
    out2.clear_output(wait=False)
    time.sleep(1)

    print(f'Generating Latent Space with model {model_name_content.value}')

    audios = get_audios([f'dataset/{name}' for name in content.value], model)

  if handle2 is not None:
    source2.data = get_plot_dim(audios, [vis1.value, vis2.value])
    bokeh.io.push_notebook(handle=handle2)
  else:
    source2 = bokeh.models.ColumnDataSource(get_plot_dim(audios, [vis1.value, vis2.value]))
    graph2 = print_visualization_2d(source2)
    handle2 = bokeh.io.show(graph2, notebook_handle=True)
    display(handle2)

start2.on_click(on_start2_clicked)

# Displays
display(ipywidgets.HBox([content, ipywidgets.VBox([vis1, vis2])]))
print()
display(start2)
print()
display(out2)
print()

In [None]:
# @title Listen to Audios
import IPython.display as ipd

#audios_all = get_audios([f'dataset/{name}' for name in sorted(['/'.join(x.split('/')[-2:]])])
audios_all = non_norm_audios

audio_to_listen = ipywidgets.IntSlider(
    value=0,
    min=0,
    max=len(audios_all.keys())-1,
    step=1,
    description='Audio to Listen:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
    layout=ipywidgets.Layout(width='60%', height='80px'),
)

audio_to_listen = ipywidgets.Select(
  options=[(x['name'], i) for i, x in audios_all.items()],
  description='Audio',
  layout=ipywidgets.Layout(width='120%', height='45px'),
)

@ipywidgets.interact
def visualize_audios(ind=audio_to_listen):
  print(f"\n{audios_all[ind]['name']}\n")
  display(ipd.Audio(audios_all[ind]['original_audio'], rate=audios_all[ind]['sr']))
  display(ipd.Audio(audios_all[ind]['audio'], rate=audios_all[ind]['sr']))
  display(ipd.Audio(audios_all[ind]['reconstruction'], rate=audios_all[ind]['sr']))

In [None]:
# @title View and Listen to Layers

%matplotlib inline

import glob
import ipywidgets
import time

import IPython.display as ipd
from IPython.display import display

import librosa
import matplotlib.pyplot as plt

moptions=sorted([x.split('/')[-1] for x in glob.glob('RAVE_MODELS/*.ts')])
model_name_content = ipywidgets.Dropdown(
    options=moptions,
    description='Content:',
    value=moptions[-1]
)

coptions=sorted(['/'.join(x.split('/')[-2:]) for x in glob.glob('dataset/*/*.wav')])
content = ipywidgets.Select(
    options=coptions,
    description='Content:',
    layout=ipywidgets.Layout(width='40%', height='100px'),
    index=0
)

box = ipywidgets.HBox([model_name_content, content])
display(box)

start = ipywidgets.Button(
    description='Start',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click me',
    icon='check' # (FontAwesome names without the `fa-` prefix)
)
print()
display(start)
print()

out = ipywidgets.Output(layout={'border': '0px solid black', 'padding': '.5em', 'width': '90%'})

model = None
poss_dims = 128

def get_tab(name, audio, sr):

  out_a = ipywidgets.Output(layout={'border': '0px solid black', 'padding': '.5em', 'width': '90%'})
  with out_a:
    display(name)

    fig, ax = plt.subplots() # nrows=2, ncols=1, )
    #img = librosa.display.waveshow(audio, sr=sr, ax=ax[0])

    img2 = librosa.display.specshow(librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max), y_axis='linear', x_axis='time', sr=sr, ax=ax)
    fig.colorbar(img2, ax=ax)

    plt.show(fig)

    display(ipd.Audio(audio, rate=sr))

  return out_a

def get_audio_specific_layer(layer, latent, sr=48000):

  if layer == 'All':
    lat_layer = latent
  else:
    lat_layer = np.zeros_like(latent)
    lat_layer[:, layer] = latent[:, layer]

  with torch.no_grad():
    rec = model.decode(torch.tensor(lat_layer.T).unsqueeze(0))

  audio = rec.squeeze(0).detach().numpy()
  return get_tab(f'Layer {layer}', audio.squeeze(0), sr)

def on_start_clicked(b):
  global model, poss_dims

  model_name = model_name_content.value

  with out:
    out.clear_output(wait=False)
    time.sleep(1)

    model = torch.jit.load(f'/content/drive/MyDrive/AdapExperiments/models/RAVE_MODELS/{model_name}')
    print(f'Loaded model {model_name}')

    if '_z' in model_name:
      poss_dims = int(model_name.split('_z')[1].split('.ts')[0])
    else:
      try:
        poss_dims = int(model.latent_size)
      except Exception as e:
        print(e)
        poss_dims = 128

    print(f'Calculating latent space for  {poss_dims}')

    audio, sr = librosa.load(f'dataset/{content.value}', sr=48000)

    x = torch.from_numpy(audio)
    x = x[None, None, :]

    with torch.no_grad():
      z = model.encode(x)
      reconst = model(x).squeeze(0).detach().numpy()

    lat = z.squeeze(0).detach().numpy().T

    print('Decoding Layers...')
    layers = ['All'] + list(range(poss_dims))
    tabs = ipywidgets.Tab([get_tab('Original', audio, sr), get_tab('Reconst', reconst.squeeze(0), sr)] + [get_audio_specific_layer(layer, lat) for layer in layers])
    _ = [tabs.set_title(i, f'LS_{str(l)}') for i, l in enumerate(['Original', 'Reconstruction'] + layers)]

    out.clear_output(wait=False)
    display(tabs)

start.on_click(on_start_clicked)
display(out)
print()

HBox(children=(Dropdown(description='Content:', index=13, options=('VCTK.ts', 'birds_pluma_b2048_r48000_z12.ts…




Button(description='Start', icon='check', style=ButtonStyle(), tooltip='Click me')




Output(layout=Layout(border='0px solid black', padding='.5em', width='90%'))


