<a href="https://colab.research.google.com/github/HyunLee103/Music_VAE_drum_sampling/blob/main/Music_VAE_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Environment setting

In [None]:
import glob

BASE_DIR = "gs://download.magenta.tensorflow.org/models/music_vae/colab2"

print('Installing dependencies...')
!apt-get update -qq && apt-get install -qq libfluidsynth1 fluid-soundfont-gm build-essential libasound2-dev libjack-dev
!pip install -q pyfluidsynth
!pip install -qU magenta

# Hack to allow python to pick up the newly-installed fluidsynth lib.
# This is only needed for the hosted Colab environment.
import ctypes.util
orig_ctypes_util_find_library = ctypes.util.find_library
def proxy_find_library(lib):
  if lib == 'fluidsynth':
    return 'libfluidsynth.so.1'
  else:
    return orig_ctypes_util_find_library(lib)
ctypes.util.find_library = proxy_find_library


print('Importing libraries and defining some helper functions...')
from google.colab import files
import magenta.music as mm
from magenta.models.music_vae import configs
from magenta.models.music_vae.trained_model import TrainedModel
import numpy as np
import os
import tensorflow.compat.v1 as tf

tf.disable_v2_behavior()
tf.enable_eager_execution()

# Necessary until pyfluidsynth is updated (>1.2.5).
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

def play(note_sequence):
  mm.play_sequence(note_sequence, synth=mm.fluidsynth)

def interpolate(model, start_seq, end_seq, num_steps, max_length=32,
                assert_same_length=True, temperature=0.5,
                individual_duration=4.0):
  """Interpolates between a start and end sequence."""
  note_sequences = model.interpolate(
      start_seq, end_seq,num_steps=num_steps, length=max_length,
      temperature=temperature,
      assert_same_length=assert_same_length)

  print('Start Seq Reconstruction')
  play(note_sequences[0])
  print('End Seq Reconstruction')
  play(note_sequences[-1])
  print('Mean Sequence')
  play(note_sequences[num_steps // 2])
  print('Start -> End Interpolation')
  interp_seq = mm.sequences_lib.concatenate_sequences(
      note_sequences, [individual_duration] * len(note_sequences))
  play(interp_seq)
  mm.plot_sequence(interp_seq)
  return interp_seq if num_steps > 3 else note_sequences[num_steps // 2]

def download(note_sequence, filename):
  mm.sequence_proto_to_midi_file(note_sequence, filename)
  files.download(filename)

print('Done')

# Preprocess MIDI to tfrecord

In [None]:
cd /content/drive/MyDrive/Colab Notebooks/magenta/magenta/scripts

/content/drive/MyDrive/Colab Notebooks/magenta/magenta/scripts


In [None]:
import hashlib
import os

from note_seq import abc_parser
from note_seq import midi_io
from note_seq import musicxml_reader
import tensorflow.compat.v1 as tf

#### 1. midi를 proto로 형식의 sequence로 변환

In [None]:
sequence = midi_io.midi_to_sequence_proto(
    tf.gfile.GFile('/content/drive/MyDrive/Colab Notebooks/magenta/magenta/scripts/INPUT_DIRECTORY/drummer1/session2/100_funk-rock_92_fill_4-4.mid', 'rb').read())

- sequence는 paly 함수를 통해 바로 들을 수 있다.

In [None]:
play(sequence)

In [None]:
sequence

ticks_per_quarter: 480
time_signatures {
  numerator: 4
  denominator: 4
}
key_signatures {
}
tempos {
  qpm: 91.99998773333496
}
notes {
  pitch: 36
  velocity: 58
  start_time: 0.008152175000000001
  end_time: 0.10869566666666666
  is_drum: true
}
notes {
  pitch: 26
  velocity: 127
  start_time: 0.03532609166666667
  end_time: 0.13586958333333335
  is_drum: true
}
notes {
  pitch: 36
  velocity: 60
  start_time: 0.334239175
  end_time: 0.43478266666666665
  is_drum: true
}
notes {
  pitch: 26
  velocity: 127
  start_time: 0.34782613333333334
  end_time: 0.44972832083333336
  is_drum: true
}
notes {
  pitch: 40
  velocity: 127
  start_time: 0.652174
  end_time: 0.7527174916666667
  is_drum: true
}
notes {
  pitch: 26
  velocity: 127
  start_time: 0.6671196541666666
  end_time: 0.7676631458333334
  is_drum: true
}
notes {
  pitch: 38
  velocity: 11
  start_time: 0.9741849125
  end_time: 1.0747284041666667
  is_drum: true
}
notes {
  pitch: 36
  velocity: 57
  start_time: 0.97961969583

### 2. sequence를 tfrecord 형식으로 변환
- tfrecord는 tensorflow에서 사용가능한 바이너리 형식 format
- 저장 용량을 대폭 줄일 수 있고, TFRecordDataset 모듈을 통해 데이터셋을 구축할 수 있다

In [None]:
with tf.io.TFRecordWriter('test_tfrecord') as writer:
    writer.write(sequence.SerializeToString())

### 3.  다음 script는 위 2가지 전처리를 반복, INPUT_DIRECTORY 내 모든 midi 파일을 SEQUENCES_TFRECORD로 변환.

In [None]:
!python convert_dir_to_note_sequences.py \
  --input_dir=INPUT_DIRECTORY \
  --output_file=SEQUENCES_TFRECORD \
  --recursive

- 잘 저장 되었는지 읽어서 확인

In [None]:
raw_dataset = tf.data.TFRecordDataset('SEQUENCES_TFRECORD')

In [None]:
# tf.1 이라 tf.enable_eager_execution() 모드에서만 가능
for raw_record in raw_dataset.take(10):
    print(repr(raw_record))

<tf.Tensor: shape=(), dtype=string, numpy=b'\nA/id/midi/INPUT_DIRECTORY/7ed86b524ba6b91d7ca5b77ff03448ac59088113\x12*drummer1/session3/18_rock_128_beat_4-4.mid\x1a\x0fINPUT_DIRECTORY \xe0\x03*\x04\x10\x04\x18\x042\x00:\t\x11\x00\x00\x00\x00\x00\x00`@B\x18\x08,\x10-\x19\x00\x00\x00\x00\x00\xe8\xeb?!\x00\x00\x00\x00\x000\xef?H\x01B\x18\x08(\x10\x7f\x19\x00\x00\x00\x00\x00\xdc\xf5?!\x00\x00\x00\x00\x00X\xf6?H\x01B\x18\x08(\x10~\x19\x00\x00\x00\x00\x00X\xf6?!\x00\x00\x00\x00\x00x\xf7?H\x01B\x18\x08$\x10\x7f\x19\x00\x00\x00\x00\x00T\xfd?!\x00\x00\x00\x00\x00\xf4\xfe?H\x01B\x18\x08$\x10\x7f\x19\x00\x00\x00\x00\x00\x98\x00@!\x00\x00\x00\x00\x00f\x01@H\x01B\x18\x08(\x10\x7f\x19\x00\x00\x00\x00\x00r\x02@!\x00\x00\x00\x00\x00@\x03@H\x01B\x18\x08&\x10\x07\x19\x00\x00\x00\x00\x00\xb2\x03@!\x00\x00\x00\x00\x00\x82\x04@H\x01B\x18\x08$\x10E\x19\x00\x00\x00\x00\x00>\x04@!\x00\x00\x00\x00\x00\x0c\x05@H\x01B\x18\x08$\x10\x7f\x19\x00\x00\x00\x00\x00P\x06@!\x00\x00\x00\x00\x00 \x07@H\x01B\x18\x08$\x10p\x1

# Training
- 논문에 나온대로 50k ~ 100k 사이, 54k epoch 만큼 학습
- colab pro 환경에서 약 2시간 반 소요
- default path가 tmp/ 로 되어있어 runtime 초기화되면 ckpt가 날라간다 -> ckpt 보존을 위해 구글 드라이브로 path 조정
- hyper-parameter는 default 값 사용

In [None]:
cd /content/drive/MyDrive/Colab Notebooks/magenta/magenta/models/music_vae

/content/drive/MyDrive/Colab Notebooks/magenta/magenta/models/music_vae


In [None]:
!python music_vae_train.py \
--config=groovae_4bar \
--run_dir=./ckpt \
--mode=train \
--tfds_name=groove/4bar-midionly \
--chekpoint_to_keep 50

# Sampling 
- 학습된 모델을 load 하고, 랜덤 가우시안 벡터를 디코더에 통과시켜 sample 생성
- 4마디 샘플을 생성하므로, length = 64비트(16 x 4) 
- 생성된 샘플은 generated_sample/ 에 저장

In [None]:
drums_models = {}
drums_config = configs.CONFIG_MAP['groovae_4bar']
drums_models['groovae_4bar'] = TrainedModel(drums_config, batch_size=4, checkpoint_dir_or_path='./ckpt/train/model.ckpt-54120')

In [34]:
drums_sample_model = "groovae_4bar"
temperature = 0.5
drums_samples = drums_models[drums_sample_model].sample(n=4, length=64, temperature=temperature)
for ns in drums_samples:
  play(ns)

In [None]:
# 생성된 샘플 data sequence 
drums_samples[0]

tempos {
  qpm: 120.0
}
notes {
  pitch: 50
  velocity: 85
  start_time: 0.12441039393888786
  end_time: 0.24941039393888786
  instrument: 9
  is_drum: true
}
notes {
  pitch: 42
  velocity: 54
  start_time: 0.24161612801253796
  end_time: 0.36661612801253796
  instrument: 9
  is_drum: true
}
notes {
  pitch: 36
  velocity: 43
  start_time: 0.49567309068515897
  end_time: 0.620673090685159
  instrument: 9
  is_drum: true
}
notes {
  pitch: 38
  velocity: 88
  start_time: 0.4948882684111595
  end_time: 0.6198882684111595
  instrument: 9
  is_drum: true
}
notes {
  pitch: 42
  velocity: 23
  start_time: 0.48846295196563005
  end_time: 0.61346295196563
  instrument: 9
  is_drum: true
}
notes {
  pitch: 42
  velocity: 36
  start_time: 0.7327359020709991
  end_time: 0.8577359020709991
  instrument: 9
  is_drum: true
}
notes {
  pitch: 36
  velocity: 104
  start_time: 0.9963298765942454
  end_time: 1.1213298765942454
  instrument: 9
  is_drum: true
}
notes {
  pitch: 42
  velocity: 48
  star