In [1]:
%cd /content
!git clone https://github.com/golbin/WaveNet.git
%cd /content/WaveNet
!git pull

/content
Cloning into 'WaveNet'...
remote: Enumerating objects: 64, done.[K
remote: Counting objects: 100% (28/28), done.[K
remote: Compressing objects: 100% (14/14), done.[K
remote: Total 64 (delta 15), reused 14 (delta 14), pack-reused 36[K
Receiving objects: 100% (64/64), 4.28 MiB | 15.94 MiB/s, done.
Resolving deltas: 100% (24/24), done.
/content/WaveNet
Already up to date.


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Model

We changed 'loss.data[0]' with 'loss.item()' in the train function

In [3]:
import os

import torch
import torch.optim

from wavenet.networks import WaveNet as WaveNetModule


class WaveNet:
    def __init__(self, layer_size, stack_size, in_channels, res_channels, lr=0.002):

        self.net = WaveNetModule(layer_size, stack_size, in_channels, res_channels)

        self.in_channels = in_channels
        self.receptive_fields = self.net.receptive_fields

        self.lr = lr
        self.loss = self._loss()
        self.optimizer = self._optimizer()

        self._prepare_for_gpu()

    @staticmethod
    def _loss():
        loss = torch.nn.CrossEntropyLoss()

        if torch.cuda.is_available():
            loss = loss.cuda()

        return loss

    def _optimizer(self):
        return torch.optim.Adam(self.net.parameters(), lr=self.lr)

    def _prepare_for_gpu(self):
        if torch.cuda.device_count() > 1:
            print("{0} GPUs are detected.".format(torch.cuda.device_count()))
            self.net = torch.nn.DataParallel(self.net)

        if torch.cuda.is_available():
            self.net.cuda()

    def train(self, inputs, targets):
        """
        Train 1 time
        :param inputs: Tensor[batch, timestep, channels]
        :param targets: Torch tensor [batch, timestep, channels]
        :return: float loss
        """
        outputs = self.net(inputs)

        loss = self.loss(outputs.view(-1, self.in_channels),
                         targets.long().view(-1))

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        #return loss.data[0]
        return loss.item()


    def generate(self, inputs):
        """
        Generate 1 time
        :param inputs: Tensor[batch, timestep, channels]
        :return: Tensor[batch, timestep, channels]
        """
        outputs = self.net(inputs)

        return outputs

    @staticmethod
    def get_model_path(model_dir, step=0):
        basename = 'wavenet'

        if step:
            return os.path.join(model_dir, '{0}_{1}.pkl'.format(basename, step))
        else:
            return os.path.join(model_dir, '{0}.pkl'.format(basename))

    def load(self, model_dir, step=0):
        """
        Load pre-trained model
        :param model_dir:
        :param step:
        :return:
        """
        print("Loading model from {0}".format(model_dir))

        model_path = self.get_model_path(model_dir, step)

        self.net.load_state_dict(torch.load(model_path))

    def save(self, model_dir, step=0):
        print("Saving model into {0}".format(model_dir))

        model_path = self.get_model_path(model_dir, step)

        torch.save(self.net.state_dict(), model_path)

# Trainer

We set the parameters and we create the directories for saving the model and the output.

In [None]:
# these are the values for the parameters as set in the git repo, at the moment they suck up all the available RAM
#layer_size=5
#stack_size=2
#in_channels=2
#res_channels=512
#lr=0.0002
#data_dir='/content/WaveNet/test/data/'
#receptive_fields=62
#sample_size=2000
#sample_rate=8000
#num_steps=100000
#model_dir='/content/WaveNet/model/'
#output_dir='/content/WaveNet/output/'

In [4]:
# diminishing res_channels, sample_size, sample_rate and num_steps, makes the process go through, but the generated output is bad
layer_size=5
stack_size=2
in_channels=2
res_channels=2
lr=0.0002
data_dir='/content/WaveNet/test/data/'
#receptive_fields=5
receptive_fields=62
sample_size=2000
sample_rate=8000
num_steps=10000
model_dir='/content/WaveNet/model/'
output_dir='/content/WaveNet/output/'

In [5]:
import os

import wavenet.config as config
#from wavenet.model import WaveNet
from wavenet.utils.data import DataLoader


class Trainer:
    def __init__(self, layer_size, stack_size,in_channels, res_channels, lr, data_dir, receptive_fields, sample_size,sample_rate, num_steps, model_dir, output_dir):


        self.wavenet = WaveNet(layer_size, stack_size,
                               in_channels, res_channels,
                               lr=lr)

        self.data_loader = DataLoader(data_dir, receptive_fields,
                                      sample_size, sample_rate, in_channels)

    def infinite_batch(self):
        while True:
            for dataset in self.data_loader:
                for inputs, targets in dataset:
                    yield inputs, targets

    def run(self):
        total_steps = 0

        for inputs, targets in self.infinite_batch():
            loss = self.wavenet.train(inputs, targets)
            print('LOSS= '+str(loss))

            total_steps += 1

            print('[{0}/{1}] loss: {2}'.format(total_steps, num_steps, loss))

            if total_steps > num_steps:
                break

        self.wavenet.save(model_dir)


def prepare_output_dir(args):
    log_dir = os.path.join(output_dir, 'log')
    model_dir = os.path.join(output_dir, 'model')
    test_output_dir = os.path.join(output_dir, 'test')

    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)
    os.makedirs(test_output_dir, exist_ok=True)

In [6]:
trainer=Trainer(layer_size, stack_size,in_channels, res_channels, lr, data_dir, receptive_fields, sample_size,sample_rate, num_steps, model_dir, output_dir)

In [None]:
trainer.run()

# Generation

We need to upload the audiocheck.net_whitenoisegaussian.wav file to have a seed, and to create the output directory for the generated audio.

In [None]:
#seed_file='/audiocheck.net_whitenoisegaussian.wav'
seed_file='/content/WaveNet/test/data/helloworld.wav'
out='/content/WaveNet/output/generated.wav'

In [None]:
#seed_file='/content/drive/MyDrive/audiocheck.net_whitenoisegaussian.wav'


In [None]:
step=0

In [None]:
import torch
import librosa
import datetime
import numpy as np
import soundfile as sf

import wavenet.config as config
from wavenet.model import WaveNet
import wavenet.utils.data as utils



class Generator:
    def __init__(self, layer_size,stack_size,in_channels,res_channels,
                 model_dir,step,sample_size,sample_rate, seed_file, out):

        self.wavenet = WaveNet(layer_size, stack_size,
                               in_channels, res_channels)

        self.wavenet.load(model_dir, step)

    @staticmethod
    def _variable(data):
        tensor = torch.from_numpy(data).float()

        if torch.cuda.is_available():
            return torch.autograd.Variable(tensor.cuda())
        else:
            return torch.autograd.Variable(tensor)

    def _make_seed(self, audio):
        audio = np.pad([audio], [[0, 0], [self.wavenet.receptive_fields, 0], [0, 0]], 'constant')

        if sample_size:
            seed = audio[:, :sample_size, :]
        else:
            seed = audio[:, :self.wavenet.receptive_fields*2, :]

        return seed

    def _get_seed_from_audio(self, filepath):
        audio = utils.load_audio(filepath, sample_rate)
        audio_length = len(audio)

        audio = utils.mu_law_encode(audio, in_channels)
        audio = utils.one_hot_encode(audio, in_channels)

        seed = self._make_seed(audio)

        return self._variable(seed), audio_length

    def _save_to_audio_file(self, data):
        data = data[0].cpu().data.numpy()
        data = utils.one_hot_decode(data, axis=1)
        audio = utils.mu_law_decode(data, in_channels)
        print('OUT=',out)
        sf.write(out, audio, sample_rate, 'PCM_24')
        #librosa.output.write_wav(out, audio, sample_rate)
        print('Saved wav file at {}'.format(out))

        return librosa.get_duration(y=audio, sr=sample_rate)

    def generate(self):
        outputs = []
        #inputs, audio_length = self._get_seed_from_audio(self.seed)
        inputs, audio_length = self._get_seed_from_audio(seed_file)

        while True:
            new = self.wavenet.generate(inputs)

            outputs = torch.cat((outputs, new), dim=1) if len(outputs) else new

            print('{0}/{1} samples are generated.'.format(len(outputs[0]), audio_length))

            if len(outputs[0]) >= audio_length:
                break

            inputs = torch.cat((inputs[:, :-len(new[0]), :], new), dim=1)

        outputs = outputs[:, :audio_length, :]

        return self._save_to_audio_file(outputs)

In [None]:
generator = Generator(layer_size,stack_size,in_channels,res_channels,model_dir,step,sample_size,sample_rate, seed_file,out)

Loading model from /content/WaveNet/model/


In [None]:
generator.generate()

138/10429 samples are generated.
276/10429 samples are generated.
414/10429 samples are generated.
552/10429 samples are generated.
690/10429 samples are generated.
828/10429 samples are generated.
966/10429 samples are generated.
1104/10429 samples are generated.
1242/10429 samples are generated.
1380/10429 samples are generated.
1518/10429 samples are generated.
1656/10429 samples are generated.
1794/10429 samples are generated.
1932/10429 samples are generated.
2070/10429 samples are generated.
2208/10429 samples are generated.
2346/10429 samples are generated.
2484/10429 samples are generated.
2622/10429 samples are generated.
2760/10429 samples are generated.
2898/10429 samples are generated.
3036/10429 samples are generated.
3174/10429 samples are generated.
3312/10429 samples are generated.
3450/10429 samples are generated.
3588/10429 samples are generated.
3726/10429 samples are generated.
3864/10429 samples are generated.
4002/10429 samples are generated.
4140/10429 samples ar

104.29

In [None]:
np.zeros((5, 14), dtype=float)

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

# Bits And Pieces
Parts of code which may be useful when working to generate Timeseries

In [None]:
data_loader=DataLoader(TEST_AUDIO_DIR,RECEPTIVE_FIELDS, SAMPLE_SIZE, SAMPLE_RATE, IN_CHANNELS,
                             shuffle=False)

In [None]:
dataset_size = []

for dataset in data_loader:
  input_size = []
  target_size = []

  for i, t in dataset:
    #input_size.append(i.shape)
    #target_size.append(t.shape)
    print(i.shape)
    print(t.shape)

  #dataset_size.append([input_size, target_size])

In [None]:
#inputs=torch.tensor([[[0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [0.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.],
#         [1.]]])
#targets=torch.tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
#         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
#         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
#         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
#         0., 0., 0., 0.]])