#  Wavegan


### **Purpose:** 
The purpose of this WaveGan is to generate 1 second audio samples based on a ditribution of 1 second audio samples. This is achieved by using a generative adversarial network where a generator neural network and a discriminator neural network play a minimax game agasint one another.

### **Setup:** 
A requirement for this program to work is to have audio split into 1 second samples in one folder.

#### Configure arguments for this network:
`parser = argparse.ArgumentParser()
parser.add_argument('--dataset_dir_x', dest='dataset_dir', default='formatteddata', help='path of the dataset')
parser.add_argument('--output_dir_x', dest='output_dir', default='generatedaudio', help='path for generated output')
parser.add_argument('--epoch', dest='epoch', type=int, default=200001, help='# of epoch')
parser.add_argument('--epoch_step', dest='epoch_step', type=int, default=100, help='# of epoch to decay lr')
parser.add_argument('--lamda', dest='lamda', type=int, default=10, help='Wasserstein Distance Multiplier')
parser.add_argument('--generator_learning_rate', dest='glr', type=float, default=.0001,help=' generator learning rate')
parser.add_argument('--discriminator_learning_rate', dest='dlr', type=float, default=.0001,help=' generator learning rate')
parser.add_argument('--batch_size', dest='batch_size', type=int, default=64, help='# images in batch')
parser.add_argument('--num_z_dims', dest='z_dims', type=int, default=5, help='dimensions in z')
parser.add_argument('--num_critic_steps', dest='num_critic_steps', type=int, default=10, help='number of discriminator steps per generator step')
parser.add_argument('--num_seconds', dest='num_seconds', type=int, default=4, help='amount of data in audio sample')
parser.add_argument('--len_audio_sample', dest='a_len', type=int, default=16384*4, help='amount of data in audio sample')
args = parser.parse_args()`


#### Instantiate and Train Wavegan

In [None]:
with tf.Session(config=tfconfig) as sess:
        model = wavegan(sess, args)
        model.train(args)

   ### Initialize values for Wavegan
    
    def __init__(self, sess, args):
        self.sess = sess
        self.batch_size = args.batch_size
        self.dataset_dir = args.dataset_dir
        self.epoch=args.epoch
        self.discriminator = discriminator
        self.generator = generator
        self.x_data= nnUtils.import_audio(self.dataset_dir)
        self.x_data=util.scale_data(self.x_data)
        self.output_dir=args.output_dir
        self.a_len=args.a_len
        self.glr=args.glr
        self.dlr=args.dlr
        self.num_seconds=args.num_seconds
        self.z_dims=args.z_dims
        self.lamda=args.lamda
        self.num_critic_steps=args.num_critic_steps
        self.build()

build(self) is called within the initialization of Wavegan and assembles the architecture for the neural network.

In [8]:
    def build(self):
        self.x=tf.placeholder(tf.float32, [None, self.a_len, 1])
        self.z=tf.placeholder(tf.float32, [None, self.z_dims])

        self.genx = generator(self.z, num_seconds=self.num_seconds)

        self.rand = tf.random_uniform([tf.shape(self.x)[0]], minval=0, maxval=1)
        self.interp = tf.transpose((self.rand * tf.transpose(self.x, [2, 1, 0])), \
                                [2,1,0]) + tf.transpose(((1 - self.rand) * \
                                tf.transpose(self.genx, [2,1,0])), [2,1,0])
        self.Interpolator = discriminator(self.interp,name="GAN/discriminator")
        self.c_out_int = tf.reshape(self.Interpolator, [-1, 1])
        self.c_grad_int = tf.gradients(self.c_out_int, self.interp)[0]
        self.lag_int = tf.reduce_mean(tf.pow((tf.norm(self.c_grad_int, ord='euclidean', axis=(1, 2)) - 1), 2))
        self.dx = discriminator(self.x, name="GAN/discriminator", reuse=True)
        self.dg = discriminator(self.genx, name="GAN/discriminator",reuse=True)
        self.wd = tf.reduce_mean(self.dx-self.dg)
        self.d_loss = (self.lamda*self.lag_int)-self.wd
        self.g_loss=self.wd
        vars=tf.trainable_variables()
        self.d_vars=[v for v in vars if (v.name.startswith("GAN/discriminator"))]
        self.g_vars=[v for v in vars if (v.name.startswith("gen"))]

### Instantiate phi object and train 
`with tf.Session(config=tfconfig) as sess:
        model = phi(sess, args)
        model.train(args)`

### Set arguments in phi object
`def __init__(self, sess, args):
        self.sess = sess
        self.dataset_dir_x = args.dataset_dir_x
        self.test_dir_x = args.test_dir_x
        self.fig_output=args.fig_output
        self.epoch = args.epoch
        self.lr = args.lr
        self.dataset_dir_x = args.dataset_dir_x
        self.beta1 = args.beta1
        self.image_shape = args.image_shape
        self.batch_size = args.bs
        self.phi_network = phi_network_residual
        self.input= nnUtils.import_images(self.dataset_dir_x)
        self.input_indecies=np.load(args.indecies_file)
        self.num_bins=args.num_bins
        self.alpha=args.alpha
        self.vector_dims=args.vector_dims
        self.graph = args.graph
        self.graph_freq = args.graph_freq
        self.graph_amount=args.graph_amount
        self.plot_loss=args.plot_loss
        self.solid_shapes = args.solid_shapes
        self.plot3d=args.plot3d
        self.build()`

### Train the network

In [None]:
    def train(self,args):
        self.d_optim = tf.train.AdamOptimizer(self.dlr) \
            .minimize(self.d_loss, var_list=self.d_vars)
        self.g_optim = tf.train.AdamOptimizer(self.glr) \
            .minimize(self.g_loss, var_list=self.g_vars)
        iteration = tf.Variable(0, dtype=tf.int32)
        increment_iter = tf.assign(iteration, iteration + 1)
        init=tf.global_variables_initializer()
        self.sess.run(init)
        for i in range(self.epoch):
            for j in range(int(self.x_data.shape[0]/self.batch_size)):
                z_batch = np.random.randn(self.batch_size, self.z_dims)
                randlist_x = np.random.randint(0, self.x_data.shape[0], self.batch_size)
                real_x_sample=np.reshape(self.x_data,(self.x_data.shape[0],self.a_len,1))
                real_x_sample=real_x_sample[randlist_x]
                for k in range(self.num_critic_steps):
                    # Update D network

                    d_loss, _ = self.sess.run(
                        [self.d_loss,self.d_optim],
                        feed_dict={self.x: real_x_sample,
                        self.z: z_batch})

                #G Network
                fake_x, _ = self.sess.run(
                [self.genx, self.g_optim],
                feed_dict={self.z: z_batch, self.x: real_x_sample})
                wd=self.sess.run(self.wd, feed_dict={self.z: z_batch, self.x: real_x_sample})
                dx=self.sess.run(self.dx, feed_dict={self.z: z_batch, self.x: real_x_sample})
                dg=self.sess.run(self.dg, feed_dict={self.z: z_batch, self.x: real_x_sample})
                print("Wasserstein Disctance: " + str(wd))
            print("Iterations: %d\t" %(i))
            self.sess.run(increment_iter)
            if (i%1==0):
                g_batch = util.scale_data(fake_x, scale=[-32759, 32759], dtype=np.int32)
                nnUtils.write_audio(g_batch[:10],self.output_dir,i)
