# Train DDSP on NSynth guitar subset

## Setup Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# create a symlink without spaces to avoid problems with apache-beam
!ln -s '/content/drive/My Drive' '/gdrive'

## Install Dependencies

First we install the required dependencies with `pip`.

In [67]:
%tensorflow_version 2.x
!pip install -qU ddsp[data_preparation]==1.0.1

In [68]:
# import os

# os.chdir("/content")

# ddsp_dir = "ddsp"
# if not os.path.exists(ddsp_dir):
#   !git clone https://github.com/magenta/$ddsp_dir

# os.chdir(ddsp_dir)

# %tensorflow_version 2.x
# !pip install -e .
# !pip install note_seq

## Make directories to save model and data

In [None]:
import glob
import os

drive_dir = os.path.normpath('/gdrive/nsynth_guitar')

assert os.path.exists(drive_dir)
print('Drive Folder Exists:', drive_dir)

## Prepare Dataset
### Preprocess raw audio into TFRecord dataset

We need to do some preprocessing on the raw audio you uploaded to get it into the correct format for training. This involves turning the full audio into short (4-second) examples, inferring the fundamental frequency (or "pitch") with [CREPE](http://github.com/marl/crepe), and computing the loudness. These features will then be stored in a sharded [TFRecord](https://www.tensorflow.org/tutorials/load_data/tfrecord) file for easier loading. Depending on the amount of input audio, this process usually takes a few minutes.

* (Optional) Transfer dataset from drive. If you've already created a dataset, from a previous run, this cell will skip the dataset creation step.

In [70]:
# audio_dir = os.path.join(drive_dir, 'audio')
# dataset_dir = os.path.join(drive_dir, 'dataset')
# !mkdir -p "$dataset_dir"

# audio_filepattern = audio_dir + '/*'
# dataset_filepattern = dataset_dir + '/*'

# train_tfrecord = os.path.join(dataset_dir, 'train.tfrecord')
# train_tfrecord_filepattern = train_tfrecord + "*"

# if not glob.glob(dataset_filepattern):
#   # Make a new dataset.
#   if not glob.glob(audio_filepattern):
#     raise ValueError('No audio files found.')

#   !ddsp_prepare_tfrecord \
#     --input_audio_filepatterns=$audio_filepattern \
#     --output_tfrecord_path=$train_tfrecord \
#     --num_shards=10 \
#     --sliding_window_hop_secs=4 \
#     --alsologtostderr

### Download Preprocessed NSynth Guitar Subset
Imported from Amit notebook https://github.com/TheSoundOfAIOSR/rg_sound_generation/blob/main/members/amit/Download%20Preprocessed.ipynb

In [71]:
import os
dataset_dir = 'preprocessed'
train_dataset_dir = os.path.join(dataset_dir, 'data_train')
valid_dataset_dir = os.path.join(dataset_dir, 'data_valid')
test_dataset_dir = os.path.join(dataset_dir, 'data_test')

train_tfrecord_filepattern = os.path.join(train_dataset_dir, 'train.tfrecord*')
valid_tfrecord_filepattern = os.path.join(valid_dataset_dir, 'train.tfrecord*')
test_tfrecord_filepattern = os.path.join(test_dataset_dir, 'train.tfrecord*')

if not os.path.exists(dataset_dir):
  os.mkdir(dataset_dir)

  datasets = (('train', 100), ('test', 10), ('valid', 100))

  for set_name, count in datasets:
      target_dir = os.path.join('preprocessed', f'data_{set_name}')
      if not os.path.exists(target_dir):
          os.mkdir(target_dir)
      print('='*40)
      print(f'Downloading set {set_name}..')
      print('='*40)
      for i in range(0, count):
          object_name = f'train.tfrecord-000{i:02d}-of-00{count:03d}'
          object_path = f'https://osr-tsoai.s3.amazonaws.com/data_{set_name}/{object_name}'
          download_path = os.path.join(target_dir, object_name)
          print(f'Downloading to {download_path}')
          !curl -s $object_path --output $download_path

Let's load the dataset in the `ddsp` library and have a look at one of the examples.

In [None]:
from ddsp.colab import colab_utils
import ddsp.training
from matplotlib import pyplot as plt
import numpy as np

data_provider = ddsp.training.data.TFRecordProvider(train_tfrecord_filepattern)
dataset = data_provider.get_dataset(shuffle=True)

try:
  ex = next(iter(dataset))
except StopIteration:
  raise ValueError(
      'TFRecord contains no examples. Please try re-running the pipeline with '
      'different audio file(s).')

colab_utils.specplot(ex['audio'])
colab_utils.play(ex['audio'])

f, ax = plt.subplots(3, 1, figsize=(14, 4))
x = np.linspace(0, 4.0, 1000)
ax[0].set_ylabel('loudness_db')
ax[0].plot(x, ex['loudness_db'])
ax[1].set_ylabel('F0_Hz')
ax[1].set_xlabel('seconds')
ax[1].plot(x, ex['f0_hz'])
ax[2].set_ylabel('F0_confidence')
ax[2].set_xlabel('seconds')
ax[2].plot(x, ex['f0_confidence'])

## Train Model
* Models typically perform well when the loss drops to the range of ~4.5-5.0.
* Depending on the dataset this can take anywhere from 5k-30k training steps usually.
* The default is set to 30k, but you can stop training at any time, and for timbre transfer, it's best to stop before the loss drops too far below ~5.0 to avoid overfitting.
* On the colab GPU, this can take from around 3-20 hours. 
* By default, checkpoints will be saved every 300 steps with a maximum of 10 checkpoints (at ~60MB/checkpoint this is ~600MB). Feel free to adjust these numbers depending on the frequency of saves you would like and space on your drive.

In [73]:
drive_train_dir = os.path.join(drive_dir, 'train')
drive_eval_dir = os.path.join(drive_dir, 'eval')
!mkdir -p "$drive_train_dir"
!mkdir -p "$drive_eval_dir"

First, let's start up a [TensorBoard](https://www.tensorflow.org/tensorboard) to monitor our loss as training proceeds. 

Initially, TensorBoard will report `No dashboards are active for the current data set.`, but once training begins, the dashboards should appear.

In [None]:
%reload_ext tensorboard
import tensorboard as tb
tb.notebook.start('--logdir "{}"'.format(drive_train_dir))

### Train on train set

In [None]:
!ddsp_run \
  --mode=train \
  --alsologtostderr \
  --save_dir="$drive_train_dir" \
  --gin_file=papers/iclr2020/nsynth_ae.gin \
  --gin_file=datasets/tfrecord.gin \
  --gin_param="TFRecordProvider.file_pattern='$train_tfrecord_filepattern'" \
  --gin_param="batch_size=16" \
  --gin_param="train_util.train.num_steps=30000" \
  --gin_param="train_util.train.steps_per_save=300" \
  --gin_param="trainers.Trainer.checkpoints_to_keep=10"

### Evaluate on test set

In [76]:
# !ddsp_run \
#   --mode=eval \
#   --alsologtostderr \
#   --save_dir="$drive_eval_dir" \
#   --gin_file=papers/iclr2020/nsynth_ae.gin \
#   --gin_file=datasets/tfrecord.gin \
#   --gin_param="TFRecordProvider.file_pattern='$test_tfrecord_filepattern'" \

## Resynthesis

Check how well the model reconstructs the training data

In [None]:
from ddsp.colab.colab_utils import play, specplot
import ddsp.training
import gin
from matplotlib import pyplot as plt
import numpy as np

data_provider = ddsp.training.data.TFRecordProvider(train_tfrecord_filepattern)
dataset = data_provider.get_batch(batch_size=1, shuffle=True)

try:
  batch = next(iter(dataset))
except OutOfRangeError:
  raise ValueError(
      'TFRecord contains no examples. Please try re-running the pipeline with '
      'different audio file(s).')

# Parse the gin config.
gin_file = os.path.join(drive_train_dir, 'operative_config-0.gin')
gin.parse_config_file(gin_file)

# Load model
model = ddsp.training.models.Autoencoder()
model.restore(drive_train_dir)

# Resynthesize audio.
outputs = model(batch, training=False)
audio_gen = model.get_audio_from_outputs(outputs)
audio = batch['audio']

print('Original Audio')
specplot(audio)
play(audio)

print('Resynthesis')
specplot(audio_gen)
play(audio_gen)