In [None]:
from tensorflow.keras import models, layers, losses, optimizers
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from scipy.io import wavfile
import os

In [None]:
path = '/kaggle/input/'
sec = 4
data = [[], []]
folder = os.listdir(path)

for piano, syth in zip(os.listdir(path + folder[0]),  os.listdir(path+folder[1])):
    fn = [piano, syth]
    for i in range(2):
        samplerate, file = wavfile.read(path + folder[i] +'/' + fn[i])
        f = np.array(file, dtype=float)
        for j in range(0, f.shape[0] - sec * samplerate, sec * samplerate):
            data[i].append(f[j:j + sec * samplerate, :])

In [None]:
samplerate

In [None]:
x_train, y_train = np.array(data)
# piano, synth = np.array(data)
# x_train, x_test, y_train, y_test = train_test_split(piano, synth, test_size=0.4)

input_size = (x_train.shape[1], x_train.shape[2])
print(x_train.shape)

# Gan 모델 생성

In [None]:
def Generator(lr=1e-3):
    inputs = layers.Input(shape=input_size)

    # down sampling
    out = layers.Conv1D(filters=128, kernel_size=15, strides=1, padding='same', activation='relu')(inputs)
    out = layers.Conv1D(filters=256, kernel_size=5, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=512, kernel_size=5, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=1024, kernel_size=3, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=512, kernel_size=3, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)

    # repeat 4
    out = layers.Conv1D(filters=1024, kernel_size=3, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=512, kernel_size=3, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=1024, kernel_size=3, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=512, kernel_size=3, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=1024, kernel_size=3, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=512, kernel_size=3, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=1024, kernel_size=3, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=512, kernel_size=3, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    
    # up sampling
    out = layers.Conv1D(filters=1024, kernel_size=5, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=512, kernel_size=5, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=2, kernel_size=15, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)

    model = models.Model(inputs, out)
    model.compile(optimizer=optimizers.Adam(lr), loss=losses.binary_crossentropy, metrics=['binary_crossentropy'])
        

    model.summary()
    return model
Generator()

In [None]:
def discriminator(lr=1e-3):
    inputs = layers.Input(shape=input_size)
    
    out = layers.Conv1D(filters=128, kernel_size=2, strides=1, padding='same', activation='relu')(inputs)
    out = layers.Conv1D(filters=256, kernel_size=3, strides=2, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=512, kernel_size=3, strides=2, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=1024, kernel_size=3, strides=2, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=1024, kernel_size=5, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=128, kernel_size=3, strides=1, padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.Conv1D(filters=64, kernel_size=1, strides=1, padding='same', activation='relu')(out)

    out = layers.Flatten()(out)
    out = layers.Dense(1024, activation='relu')(out)
    out = layers.Dense(1, activation='sigmoid')(out)
    
    model = models.Model(inputs, out)
    model.compile(optimizer=optimizers.Adam(lr), loss=losses.binary_crossentropy, metrics=['binary_crossentropy'])
    
    return model
discriminator()

In [None]:
def Gan(discriminator, generator, lr=1e-3):
    discriminator.trainable=False
    
    inputs = layers.Input(shape=input_size)
    x = generator(inputs)
    out = discriminator(x)
    
    gan = models.Model(inputs, out)
    gan.compile(optimizer=optimizers.Adam(lr=lr), loss=losses.binary_crossentropy)
    return gan

# 하이퍼 파라미터 설정

In [None]:
generator = Generator(lr=1e-5)
discriminator = discriminator(lr=1e-5)
gan = Gan(discriminator, generator, lr=1e-5)

In [None]:
epochs=10
batch_size = 5
batch_count = int(x_train.shape[0] / 10)
gan_losses = list()

print(f"batch_count: {batch_count}")

# 모델 학습

In [None]:
for e in range(1,epochs+1):
    batch_loss = 0
    for index, i in enumerate(range(0, x_train.shape[0] - batch_size, batch_size)):
        x = x_train[i:i+batch_size, :, :]
        y = y_train[i:i+batch_size, :, :]

        # Generate fake MNIST images from noised input
        generated_synth = generator.predict(x)
        synth_batch =y[np.random.randint(low=0,high=x.shape[0],size=batch_size), :, :]
        X = np.concatenate([synth_batch, generated_synth])

        y_dis=np.zeros(2*batch_size)
        y_dis[:batch_size]=0.9

        discriminator.trainable=True
        discriminator.train_on_batch(X, y_dis)

        y_gen = np.ones(batch_size)

        discriminator.trainable=False
        loss = gan.train_on_batch(x, y_gen)
        
        batch_loss += loss
        
        if index % 15 == 0:
            print(f"Epoch {e}/{epochs} Batch:{index} loss:{loss}")

    batch_loss /= batch_count
    print(f"Epoch {e}/{epochs} loss:{batch_loss}")
    gan_losses.append(batch_loss)

# 학습 시각화

In [None]:
plt.title("train loss")
plt.plot(list(range(len(gan_losses))), gan_losses)
plt.show()

# 테스트 파일 생성

In [None]:
sample_count = 5

sample = np.random.randint(0, x_train.shape[0] - sample_count)
test = x_train[sample:sample + sample_count, : , :]

generated_synth = generator.predict(test)

for i in range(sample_count):
    wavfile.write('test'+str(i)+'.wav', samplerate, generated_synth[i])