In [None]:
import tensorflow as tf
import numpy as np

In [None]:
#hyperparameters
learning_rate = 0.0001
epochs = 5

In [None]:
def make_convlayer(X, w_name, b_name, w_shape ):
    w = tf.get_variable(w_name, w_shape, initializer=tf.truncated_normal_initializer(stddev=0.02))
    b = tf.get_variable(b_name, w_shape[3], initializer = tf.constant_initializer(0))
    layer = tf.nn.conv2d(input = X , filter = w, strides = [1,2,2,1] , padding = 'SAME') + b
    layer = tf.nn.leaky_relu(layer)
#     layer = tf.layers.batch_normalization(layer)
    return layer


In [None]:
def make_deconvlayer(X, w_name, b_name, w_shape):
    w = tf.get_variable(w_name, w_shape, initializer=tf.truncated_normal_initializer(stddev=0.02))
    b = tf.get_variable(b_name, w_shape[2], initializer = tf.constant_initializer(0))
    shape = tf.shape(X)
    out_shape = tf.stack([shape[0],shape[1]*2,shape[2]*2,tf.shape(w)[2]])
    layer = tf.nn.conv2d_transpose(value = X , filter = w, strides = [1,2,2,1] ,output_shape=out_shape, padding = 'SAME') + b
    layer = tf.nn.leaky_relu(layer)
#     layer = tf.layers.batch_normalization(layer)
    return layer

In [None]:
def discriminator(image, scope, reuse=False):
    with tf.variable_scope(scope):
        if(reuse):
            tf.get_variable_scope().reuse_variables()
        
        image_h = tf.shape(image)[1]
        image_w = tf.shape(image)[2]
    
        disc1 = make_convlayer(image,'w1','b1', [4,4,1,32])
        
        disc2 = make_convlayer(disc1,'w2','b2', [4,4,32,64])
        
        disc3 = make_convlayer(disc2,'w3','b3', [4,4,64,128])
        
        #fully connected layers
       
        disc_f1 =tf.reshape(disc3, shape=[-1, 128 * 32* 128])
        disc_wf1 = tf.get_variable('wf1', [128 * 32* 128,1024], initializer=tf.truncated_normal_initializer(stddev=0.02))
        disc_bf1 = tf.get_variable('bf1', [1024], initializer = tf.constant_initializer(0))
        disc_f1 = tf.matmul(disc_f1,disc_wf1) + disc_bf1
        disc_f1 = tf.nn.leaky_relu(disc_f1)
#         disc_f1 = tf.layers.batch_normalization(disc_f1)
        
        #output sigmoid layer
        disc_wf2 = tf.get_variable('wf2', [1024,1], initializer=tf.truncated_normal_initializer(stddev=0.02))
        disc_bf2 = tf.get_variable('bf2', [1], initializer = tf.constant_initializer(0))
        disc_f2 = tf.matmul(disc_f1,disc_wf2) + disc_bf2
        
        return disc_f2
     
        

In [None]:
def generator(image, scope, reuse=False):
    with tf.variable_scope(scope):
        if(reuse):
            tf.get_variable_scope().reuse_variables()

        #encoder layers
        enc_layer1 = make_convlayer(image,'enc_w1','enc_b1', [4,4,1,32])
       
        enc_layer2 = make_convlayer(enc_layer1,'enc_w2','enc_b2', [4,4,32,64])
        
        enc_layer3 = make_convlayer(enc_layer2,'enc_w3','enc_b3', [4,4,64,128])
        
        enc_final = make_convlayer(enc_layer3,'enc_w4','enc_b4', [4,4,128,256])
        
        
        #decoder layers 
        dec_layer4 = make_deconvlayer(enc_final,'dec_w4','dec_b4', [4,4,128,256])
        
        dec_layer3 = make_deconvlayer(dec_layer4,'dec_w3','dec_b3', [4,4,64,128])
        
        dec_layer2 = make_deconvlayer(dec_layer3,'dec_w2','dec_b2',[4,4,32,64])
        
        generated_image = make_deconvlayer(dec_layer2,'dec_w1','dec_b1',[4,4,1,32])
        
        return generated_image
       

In [None]:
# load audio and generate spectrograms
def prepare_data():
    train_piano =None
    train_violin = None
    return train_piano, train_violin

In [None]:
# train
height = 1024
width = 256
channels =1

train_piano = tf.placeholder(tf.float32,[None, height,width, channels], name= 'piano_data')
train_violin = tf.placeholder(tf.float32,[None, height,width, channels], name= 'violin_data')

# train_piano= tf.random_uniform([1,256,256,1])
# train_violin= tf.random_uniform([1,256,256,1])

#phase variable for batch norm
phase = tf.placeholder(tf.bool, name='phase')

#scope_variables
GEN_PIANO = 'generator_piano'
GEN_VIOLIN = 'generator_violin'
DISC_PIANO = 'discriminator_piano'
DISC_VIOLIN = 'discriminator_violin'

gen_piano = generator(train_violin, GEN_PIANO)
gen_violin = generator(train_piano, GEN_VIOLIN)

recons_violin =generator(gen_piano, GEN_VIOLIN, reuse = True)
recons_piano = generator(gen_violin, GEN_PIANO,reuse = True)

disc_piano_gen = discriminator(gen_piano, DISC_PIANO)
disc_violin_gen = discriminator(gen_violin, DISC_VIOLIN)

disc_piano_real = discriminator(train_piano, DISC_PIANO, reuse = True)
disc_violin_real = discriminator(train_violin, DISC_VIOLIN, reuse = True)

#reconstruction loss
loss_recons_violin = tf.reduce_mean(tf.losses.mean_squared_error(train_violin, recons_violin))
loss_recons_piano = tf.reduce_mean(tf.losses.mean_squared_error(train_piano, recons_piano))

#generator loss
loss_gen_violin = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_violin_gen,
                                                                         labels=tf.ones_like(disc_violin_gen)))
loss_gen_piano = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_piano_gen,
                                                                         labels=tf.ones_like(disc_piano_gen)))

#discriminator loss for real samples
loss_disc_violin_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_violin_real,
                                                                         labels=tf.ones_like(disc_violin_real)))
loss_disc_piano_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_piano_real,
                                                                         labels=tf.ones_like(disc_piano_real)))

#discriminator loss for generated samples
loss_disc_violin_gen = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_violin_gen,
                                                                         labels=tf.zeros_like(disc_violin_gen)))
loss_disc_piano_gen = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_piano_gen,
                                                                         labels=tf.zeros_like(disc_piano_gen)))

#combined discriminator loss
loss_disc_violin = loss_disc_violin_real + loss_disc_violin_gen
loss_disc_piano = loss_disc_piano_real + loss_disc_piano_gen

loss_disc = loss_disc_violin + loss_disc_piano

#combined generator loss
loss_gen_violin = loss_gen_violin + loss_recons_violin
loss_gen_piano = loss_gen_piano + loss_recons_piano

# gen loss
loss_gen = loss_gen_violin + loss_gen_piano

vars_dis = tf.trainable_variables(DISC_VIOLIN) + tf.trainable_variables(DISC_PIANO)
vars_gen = tf.trainable_variables(GEN_VIOLIN) + tf.trainable_variables(GEN_PIANO)

disc_trainer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_disc, var_list= vars_dis)
gen_trainer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_gen, var_list= vars_gen)



In [None]:
piano_data_file = 'spectro_piano.npy'
violin_data_file ='spectro_violin.npy'

In [None]:
piano = np.load(piano_train_file)
violin = np.load(violin_data_file)

In [None]:
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(epochs):
        _, _, piano_generated, violin_generated, lp,lv = sess.run([disc_trainer, gen_trainer,gen_piano, gen_violin,loss_gen_piano,loss_gen_violin], feed_dict={train_piano: piano,train_violin: violin})
        print('epoch ', i, " ", lp, " ", lv)
        saver.save(sess, './model/model.ckpt')
        np.save('./result/piano_epoch_'+str(i),piano_generated)
        np.save('./result/violin_epoch_'+str(i),violin_generated)
    