In [5]:
!python --version

Python 3.8.5


In [2]:
!pip install --upgrade ddsp

Looking in indexes: https://pypi.org/simple, https://packagecloud.io/github/git-lfs/pypi/simple
Requirement already up-to-date: ddsp in /Users/samuelnarvaez/opt/anaconda3/lib/python3.8/site-packages (1.3.0)






In [8]:
import warnings
warnings.filterwarnings("ignore")

import time

import ddsp
from ddsp.training import (data, decoders, encoders, models, preprocessing, 
                           train_util, trainers)
import gin
import matplotlib.pyplot as plt
import numpy as np
import tensorflow.compat.v2 as tf
import tensorflow_datasets as tfds

sample_rate = 16000
TIME_STEPS = 1000

strategy = train_util.get_strategy()

data_provider = data.TFRecordProvider(file_pattern='data/train.tfrecord*',
               example_secs=4,
               sample_rate=16000,
               frame_rate=250)
dataset = data_provider.get_batch(batch_size=1, shuffle=False).take(1).repeat()
batch = next(iter(dataset))
audio = batch['audio']
n_samples = audio.shape[1]

# Create Neural Networks.
preprocessor = preprocessing.F0LoudnessPreprocessor(time_steps=TIME_STEPS)

encoder = encoders.MfccTimeDistributedRnnEncoder(rnn_channels = 512,
                                                    rnn_type = 'gru',
                                                    z_dims = 16,
                                                    z_time_steps = 125)

decoder = decoders.RnnFcDecoder(rnn_channels = 512,
                                rnn_type = 'gru',
                                ch = 512,
                                layers_per_stack = 3,
                                input_keys = ('ld_scaled', 'f0_scaled', 'z'),
                                output_splits = (('amps', 1),
                                                  ('harmonic_distribution', 100),
                                                  ('noise_magnitudes', 65)))

# Create Processors.
harmonic = ddsp.synths.Harmonic(n_samples=n_samples, 
                                sample_rate=sample_rate,
                                name='harmonic')

noise = ddsp.synths.FilteredNoise(window_size=0,
                                  initial_bias=-10.0,
                                  name='noise')
add = ddsp.processors.Add(name='add')

# Create ProcessorGroup.
dag = [(harmonic, ['amps', 'harmonic_distribution', 'f0_hz']),
       (noise, ['noise_magnitudes']),
       (add, ['noise/signal', 'harmonic/signal'])]

processor_group = ddsp.processors.ProcessorGroup(dag=dag,
                                                 name='processor_group')


# Loss_functions
spectral_loss = ddsp.losses.SpectralLoss(loss_type='L1',
                                         mag_weight=1.0,
                                         logmag_weight=1.0)

with strategy.scope():
  # Put it together in a model.
  model = models.Autoencoder(preprocessor=preprocessor,
                             encoder=encoder,
                             decoder=decoder,
                             processor_group=processor_group,
                             losses=[spectral_loss])
  trainer = trainers.Trainer(model, strategy, learning_rate=1e-3)





INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


In [9]:
# Build model, easiest to just run forward pass.
dataset = trainer.distribute_dataset(dataset)
trainer.build(next(iter(dataset)))

Model: "autoencoder_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
f0_loudness_preprocessor_1 ( multiple                  0         
_________________________________________________________________
mfcc_time_distributed_rnn_en multiple                  843852    
_________________________________________________________________
rnn_fc_decoder_1 (RnnFcDecod multiple                  6407334   
_________________________________________________________________
processor_group (ProcessorGr multiple                  0         
_________________________________________________________________
spectral_loss (SpectralLoss) multiple                  0         
Total params: 7,251,186
Trainable params: 7,251,186
Non-trainable params: 0
_________________________________________________________________


In [6]:
dataset_iter = iter(dataset)

for i in range(300):
  losses = trainer.train_step(dataset_iter)
  res_str = 'step: {}\t'.format(i)
  for k, v in losses.items():
    res_str += '{}: {:.2f}\t'.format(k, v)
  print(res_str)

step: 0	spectral_loss: 17.58	total_loss: 17.58	
step: 1	spectral_loss: 31.94	total_loss: 31.94	
step: 2	spectral_loss: 31.03	total_loss: 31.03	
step: 3	spectral_loss: 29.89	total_loss: 29.89	
step: 4	spectral_loss: 28.77	total_loss: 28.77	
step: 5	spectral_loss: 27.08	total_loss: 27.08	
step: 6	spectral_loss: 23.50	total_loss: 23.50	
step: 7	spectral_loss: 16.46	total_loss: 16.46	
step: 8	spectral_loss: 17.60	total_loss: 17.60	
step: 9	spectral_loss: 21.80	total_loss: 21.80	
step: 10	spectral_loss: 21.77	total_loss: 21.77	
step: 11	spectral_loss: 19.12	total_loss: 19.12	
step: 12	spectral_loss: 16.14	total_loss: 16.14	
step: 13	spectral_loss: 14.76	total_loss: 14.76	
step: 14	spectral_loss: 15.24	total_loss: 15.24	
step: 15	spectral_loss: 15.69	total_loss: 15.69	
step: 16	spectral_loss: 15.45	total_loss: 15.45	
step: 17	spectral_loss: 14.70	total_loss: 14.70	
step: 18	spectral_loss: 13.93	total_loss: 13.93	
step: 19	spectral_loss: 13.84	total_loss: 13.84	
step: 20	spectral_loss: 14.20	

KeyboardInterrupt: 

In [7]:
!pip freeze > requirements.txt