In [56]:
import glob

BASE_DIR = "gs://download.magenta.tensorflow.org/models/music_vae/colab2"

print('Installing dependencies...')
!apt-get update -qq && apt-get install -qq libfluidsynth1 fluid-soundfont-gm build-essential libasound2-dev libjack-dev
!pip install -q pyfluidsynth
!pip install -qU magenta



# Hack to allow python to pick up the newly-installed fluidsynth lib.
# This is only needed for the hosted Colab environment.
import ctypes.util
orig_ctypes_util_find_library = ctypes.util.find_library
def proxy_find_library(lib):
    if lib == 'fluidsynth':
        return 'libfluidsynth.so.1'
    else:
        return orig_ctypes_util_find_library(lib)
ctypes.util.find_library = proxy_find_library


print('Importing libraries and defining some helper functions...')
from google.colab import files
import magenta.music as mm
from magenta.models.music_vae import configs
from magenta.models.music_vae.trained_model import TrainedModel
import numpy as np
import os
import tensorflow.compat.v1 as tf
import note_seq

tf.disable_v2_behavior()

# Necessary until pyfluidsynth is updated (>1.2.5).
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

def play(note_sequence):
  mm.play_sequence(note_sequence, synth=mm.fluidsynth)

def plot(note_sequence):
  note_seq.plot_sequence(note_sequence)

def interpolate(model, start_seq, end_seq, num_steps, max_length=32,
                assert_same_length=True, temperature=0.5,
                individual_duration=4.0):
  """Interpolates between a start and end sequence."""
  note_sequences = model.interpolate(
      start_seq, end_seq,num_steps=num_steps, length=max_length,
      temperature=temperature,
      assert_same_length=assert_same_length)

  print('Start Seq Reconstruction')
  play(note_sequences[0])
  print('End Seq Reconstruction')
  play(note_sequences[-1])
  print('Mean Sequence')
  play(note_sequences[num_steps // 2])
  print('Start -> End Interpolation')
  interp_seq = mm.sequences_lib.concatenate_sequences(
      note_sequences, [individual_duration] * len(note_sequences))
  play(interp_seq)
  mm.plot_sequence(interp_seq)
  return interp_seq if num_steps > 3 else note_sequences[num_steps // 2]

def download(note_sequence, filename):
  mm.sequence_proto_to_midi_file(note_sequence, filename)
  files.download(filename)

print('Done')

In [66]:
# Define Model
%%capture

hierdec_trio_16bar_config = configs.CONFIG_MAP['hierdec-trio_16bar']
model_16bar = TrainedModel(hierdec_trio_16bar_config, batch_size=4, checkpoint_dir_or_path=BASE_DIR + '/checkpoints/trio_16bar_hierdec.ckpt')

INFO:tensorflow:Building MusicVAE model with BidirectionalLstmEncoder, HierarchicalLstmDecoder, and hparams:
{'max_seq_len': 256, 'z_size': 512, 'free_bits': 256, 'max_beta': 0.2, 'beta_rate': 0.0, 'batch_size': 4, 'grad_clip': 1.0, 'clip_mode': 'global_norm', 'grad_norm_clip_to_zero': 10000, 'learning_rate': 0.001, 'decay_rate': 0.9999, 'min_learning_rate': 1e-05, 'conditional': True, 'dec_rnn_size': [1024, 1024], 'enc_rnn_size': [2048, 2048], 'dropout_keep_prob': 1.0, 'sampling_schedule': 'constant', 'sampling_rate': 0.0, 'use_cudnn': False, 'residual_encoder': False, 'residual_decoder': False, 'control_preprocessing_rnn_size': [256]}
INFO:tensorflow:
Encoder Cells (bidirectional):
  units: [2048, 2048]

INFO:tensorflow:
Hierarchical Decoder:
  input length: 256
  level output lengths: [16, 16]

INFO:tensorflow:
Decoder Cells:
  units: [1024, 1024]

INFO:tensorflow:
Decoder Cells:
  units: [1024, 1024]

INFO:tensorflow:
Decoder Cells:
  units: [1024, 1024]

INFO:tensorflow:Restoring 

In [64]:
# --- TrainedModel.sample ---
# input: n (number of sequences), softmax temperature
# returns: a list of note sequences
n = 1
temperature = 1
samples = model_16bar.sample(n=n, temperature=temperature)
print('Number of samples:',len(samples))
print('Sample object type:',type(samples[0]))

# note sequences can be plotted with plot() and played with play()
print('')
print('----- Actual samples -----')
for ns in samples:
  plot(ns)
  play(ns)

# --- TrainedModel.encode ---
# input: a list of NoteSequences
# returns: tuple of shape (3, n_sequences, num_latent_dims), where the first dim is z/mu/sigma
zmu_samples = model_16bar.encode(samples)[1]
print('Shape of encoded vector:',np.shape(zmu_samples))

# --- TrainedModel.decode ---
# input: a list of latent vectors
# returns: a list of note sequences
reconstructed_samples = model_16bar.decode(zmu_samples)
print('Number of samples:',len(reconstructed_samples))
print('Sample object type:',type(reconstructed_samples[0]))
print('')
print('----- Reconstructed samples -----')
for ns in reconstructed_samples:
  plot(ns)
  play(ns)

Number of samples: 1
Sample object type: <class 'note_seq.protobuf.music_pb2.NoteSequence'>

----- Actual samples -----


Shape of encoded vector: (1, 512)
Number of samples: 1
Sample object type: <class 'note_seq.protobuf.music_pb2.NoteSequence'>

----- Reconstructed samples -----
