In [None]:
from pathlib import Path
import os

PROJECT_DIR = Path('/home/luca/Development')

DATA_DIR = PROJECT_DIR.joinpath('data_l2_differential')
DATA_DIR.mkdir(exist_ok=True)

DATASET_DIR = PROJECT_DIR.joinpath('dataset')

DATASET = 'chitarra_michele_rossi/separate/9_les_neck_pick'

AUDIO_DIR = DATASET_DIR.joinpath(DATASET)

TFRECORDS_DIR = PROJECT_DIR.joinpath('TFRecords').joinpath(DATASET).joinpath('prova')
TFRECORDS_DIR.mkdir(parents=True, exist_ok=True)

FILENAME_PREFIX = 'train.tfrecord'

TRAIN_TFRECORD_FILEPATTERN = TFRECORDS_DIR.joinpath(FILENAME_PREFIX + '*')
os.environ['LD_LIBRARY_PATH'] = os.environ['CONDA_PREFIX'] + '/lib'
os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=' + os.environ['CONDA_PREFIX'] + '/lib'
# print(os.environ)
# !printenv



In [None]:
import tensorflow as tf

physical_devices = tf.config.list_physical_devices('GPU')
for gpu in physical_devices:
  try:
    tf.config.experimental.set_memory_growth(gpu, True)
  except:
    # Invalid device or cannot modify virtual devices once initialized.
    pass

In [None]:
from pathlib import Path
from ddsp.training.data_preparation.prepare_tfrecord_lib import prepare_tfrecord

AUDIO_FILEPATTERN = AUDIO_DIR.joinpath('*')
TRAIN_TFRECORD_INPUT = str(TFRECORDS_DIR.joinpath('input').joinpath(FILENAME_PREFIX))
TRAIN_TFRECORD_OUTPUT = str(TFRECORDS_DIR.joinpath('output').joinpath(FILENAME_PREFIX))
dataset_files = list(TFRECORDS_DIR.glob('*'))
if len(dataset_files) == 0:
    if not AUDIO_DIR.glob('*'):
        raise ValueError('No audio files found in {}'.format(AUDIO_DIR))
    
    !ddsp_prepare_tfrecord \
        --input_audio_filepatterns="$AUDIO_FILEPATTERN" \
        --output_tfrecord_path="$TRAIN_TFRECORD_INPUT" \
        --num_shards=10 \
        --example_secs=4 \
        --hop_secs=1 \
        --alsologtostderr

    !ddsp_prepare_tfrecord \
        --input_audio_filepatterns="$AUDIO_FILEPATTERN" \
        --output_tfrecord_path="$TRAIN_TFRECORD_OUTPUT" \
        --num_shards=10 \
        --example_secs=4 \
        --hop_secs=1 \
        --shifted \
        --alsologtostderr 

In [None]:
import ddsp.training
from ddsp.local import local_utils
from matplotlib import pyplot as plt
import numpy as np

batch_size = 3
sequence_length = 2
audio_rate = 32000
data_rate = 1000

input_data_provider = ddsp.training.data.TFRecordProvider(str(TRAIN_TFRECORD_INPUT) + '*')
output_data_provider = ddsp.training.data.TFRecordProvider(str(TRAIN_TFRECORD_OUTPUT) + '*')
data_providers = [input_data_provider, output_data_provider]
zipped_data_provider = ddsp.training.data.ZippedProvider(data_providers)
# dataset = data_provider.get_dataset(shuffle=False)
dataset = zipped_data_provider.get_batch(batch_size=batch_size, shuffle=True)

dataset_iter = iter(dataset)

ex_in, ex_out = next(dataset_iter)
audio_in = ex_in['audio']
audio_out = ex_out['audio']
loudness_in = ex_in['loudness_db']
loudness_out = ex_out['loudness_db']
f0_in = ex_in['f0_hz']
f0_out = ex_out['f0_hz']
f0_confidance_in = ex_in['f0_confidence']
f0_confidance_out = ex_out['f0_confidence']

local_utils.specplot(audio_in)
local_utils.play(audio_in)
local_utils.specplot(audio_out)
local_utils.play(audio_out)

f, ax = plt.subplots(6, 1, figsize=(14, 4))
x = np.linspace(0, loudness_in.shape[0])
ax[0].set_ylabel('loudness_in_db')
ax[0].plot(x, loudness_in)
ax[1].set_ylabel('loudness_out_db')
ax[1].plot(x, loudness_out)
ax[2].set_ylabel('F0_in_Hz')
ax[2].set_xlabel('seconds')
ax[2].plot(x, f0_in)
ax[3].set_ylabel('F0_out_Hz')
ax[3].set_xlabel('seconds')
ax[3].plot(x, f0_out)
ax[4].set_ylabel('F0_in_confidence')
ax[4].set_xlabel('seconds')
ax[4].plot(x, f0_confidance_in)
ax[5].set_ylabel('F0_out_confidence')
ax[5].set_xlabel('seconds')
ax[5].plot(x, f0_confidance_out)


# audio = np.empty((batch_size,0))
# loudness = np.empty((batch_size,0))
# f0 = np.empty((batch_size,0))
# f0_confidance = np.empty((batch_size,0))
# for n in range(sequence_length):
#   try:
#     ex = next(dataset_iter)
#     audio = np.concatenate((audio, ex['audio']), axis=-1)
#     loudness = np.concatenate((loudness, ex['loudness_db']), axis=-1)
#     f0 = np.concatenate((f0, ex['f0_hz']), axis=-1)
#     f0_confidance = np.concatenate((f0_confidance, ex['f0_confidence']), axis=-1)
#   except StopIteration:
#     raise ValueError(
#         'TFRecord contains no examples. Please try re-running the pipeline with '
#         'different audio file(s).')
      

# for n in range(batch_size):
#   local_utils.specplot(audio[n])
#   local_utils.play(audio[n])

#   f, ax = plt.subplots(3, 1, figsize=(14, 4))
#   x = np.linspace(0, 4.0*sequence_length, 1000*sequence_length)
#   ax[0].set_ylabel('loudness_db')
#   ax[0].plot(x, loudness[n])
#   ax[1].set_ylabel('F0_Hz')
#   ax[1].set_xlabel('seconds')
#   ax[1].plot(x, f0[n])
#   ax[2].set_ylabel('F0_confidence')
#   ax[2].set_xlabel('seconds')
#   ax[2].plot(x, f0_confidance[n])

In [None]:
from ddsp.local import local_utils
from matplotlib import pyplot as plt
import numpy as np

batch_size = 3
sequence_length = 2
audio_rate = 32000
data_rate = 1000

data_provider = ddsp.training.data.TFRecordProvider(str(TRAIN_TFRECORD_INPUT) + '*')
# dataset = data_provider.get_dataset(shuffle=False)
dataset = data_provider.get_batch(batch_size=batch_size, shuffle=False)

dataset_iter = iter(dataset)

audio = np.empty((batch_size,0))
loudness = np.empty((batch_size,0))
f0 = np.empty((batch_size,0))
f0_confidance = np.empty((batch_size,0))
for n in range(sequence_length):
  try:
    ex = next(dataset_iter)
    audio = np.concatenate((audio, ex['audio']), axis=-1)
    loudness = np.concatenate((loudness, ex['loudness_db']), axis=-1)
    f0 = np.concatenate((f0, ex['f0_hz']), axis=-1)
    f0_confidance = np.concatenate((f0_confidance, ex['f0_confidence']), axis=-1)
  except StopIteration:
    raise ValueError(
        'TFRecord contains no examples. Please try re-running the pipeline with '
        'different audio file(s).')
      

for n in range(batch_size):
  local_utils.specplot(audio[n])
  local_utils.play(audio[n])

  f, ax = plt.subplots(3, 1, figsize=(14, 4))
  x = np.linspace(0, 4.0*sequence_length, 1000*sequence_length)
  ax[0].set_ylabel('loudness_db')
  ax[0].plot(x, loudness[n])
  ax[1].set_ylabel('F0_Hz')
  ax[1].set_xlabel('seconds')
  ax[1].plot(x, f0[n])
  ax[2].set_ylabel('F0_confidence')
  ax[2].set_xlabel('seconds')
  ax[2].plot(x, f0_confidance[n])