# **Library**

In [None]:
import os
import matplotlib.pylab as plt
import librosa
import numpy as np
import pickle
import glob
import time
import random

import keras
from keras.layers import Input, Conv1D, LeakyReLU, Dense, Flatten, Dropout, Reshape, UpSampling1D, ReLU, Activation
from keras.models import load_model, Model
from keras.optimizers import Adam, RMSprop

In [None]:
BASE_PATH = 'drive/My Drive/Colab Notebooks/GAN'
DATA_RAW_PATH = 'data_raw/'
DATA_PATH = 'data/'

In [None]:
os.chdir(BASE_PATH)

In [None]:
os.getcwd()

'/content/drive/My Drive/Colab Notebooks/GAN'

# **Pre-processing**

In [None]:
DATA_KIND = ['rain', 'wind', 'bonfire', 'underwater', 'river', 'forest', 'wave']

## Split ambient sound to chunk (5s)

In [None]:
data_list = os.listdir(DATA_RAW_PATH)
print(data_list)

['rain_4.mp3', 'rain_3.mp3', 'wave_1.mp3', 'rain_2.mp3', 'rain_1.mp3', 'bonfire_1.mp3', 'rain_5.mp3', 'underwater_1.mp3', 'rain_6.mp3', 'river_1.mp3', 'forest_6.mp3', 'forest_1.mp3', 'forest_2.mp3', 'wind_1.mp3', 'forest_3.mp3', 'wind_2.mp3', 'rain_7.mp3', 'wind_3.mp3', 'forest_4.mp3', 'forest_5.mp3', 'wave_3.mp3', 'wave_2.mp3', 'underwater_2.mp3', 'river_3.mp3', 'river_2.mp3']


In [None]:
count = 0
sound_len = 5
max_num = 100

for data in data_list:
    print(f"now processing {data}...")
    kind = data.split('_')[0]

    wav, sr = librosa.load(DATA_RAW_PATH + data, sr=16000)
    time = wav.shape[0] // sr
    loop = time // sound_len

    if loop > max_num:  
        loop = max_num

    for i in range(0, loop):
        count += 1
        chunk = wav[i * sr * sound_len:(i + 1) * sr * sound_len]  
        librosa.output.write_wav(DATA_PATH + f'{kind}_{count}.wav', chunk, sr)
    print(f"complete {count} count!")


now processing rain_4.mp3...
complete 51 count!
now processing rain_3.mp3...
complete 83 count!
now processing wave_1.mp3...
complete 119 count!
now processing rain_2.mp3...
complete 172 count!
now processing rain_1.mp3...
complete 224 count!
now processing bonfire_1.mp3...
complete 324 count!
now processing rain_5.mp3...
complete 424 count!
now processing underwater_1.mp3...
complete 524 count!
now processing rain_6.mp3...
complete 624 count!
now processing river_1.mp3...
complete 724 count!
now processing forest_6.mp3...
complete 824 count!
now processing forest_1.mp3...
complete 924 count!
now processing forest_2.mp3...
complete 1024 count!
now processing wind_1.mp3...
complete 1124 count!
now processing forest_3.mp3...
complete 1224 count!
now processing wind_2.mp3...
complete 1324 count!
now processing rain_7.mp3...
complete 1424 count!
now processing wind_3.mp3...
complete 1524 count!
now processing forest_4.mp3...
complete 1624 count!
now processing forest_5.mp3...
complete 1724

### Get chunk from particular data

In [None]:
wav, sr = librosa.load(DATA_RAW_PATH + 'bonfire_2.mp3', sr=16000)
sound_len = 5

time = wav.shape[0] // sr
loop = time // sound_len

count = 200
for i in range(0, count):
    num = random.randrange(0, loop)
    chunk = wav[num * sr * sound_len:(num + 1) * sr * sound_len]  
    librosa.output.write_wav(DATA_PATH + f'bonfire_part_{i}.wav', chunk, sr)

In [None]:
# 데이터 총 갯수 확인
data_list = glob.glob(DATA_PATH + 'wave*')
print(len(data_list))

500


## Integrate to array by each of kinds and save to pickle

In [None]:
for kind in DATA_KIND:
    data_iter = glob.glob(DATA_PATH + kind + '*')
    print(len(data_iter))

    data_array = None
    for data in data_iter:
        wav, sr = librosa.load(data, sr=16000)
        wav = np.array(wav).reshape(1, -1)

        if data_array is None:  # init
            data_array = wav
        else:
            data_array = np.concatenate((data_array, wav), axis=0)  # (N, 80000)

    print(data_array.shape)

    with open(kind + '.pickle', 'wb') as f:
        pickle.dump(data_array, f)



500
(500, 80000)
500
(500, 80000)
500
(500, 80000)
500
(500, 80000)
500
(500, 80000)
600
(600, 80000)
500
(500, 80000)


## Load data

In [None]:
data_dict = dict()
for kind in DATA_KIND:
    with open(kind + '.pickle', 'rb') as f:
        data_dict[kind] = pickle.load(f)

In [None]:
for key, value in data_dict.items():
    print(key)
    print(len(value))

rain
500
wind
500
bonfire
500
underwater
500
river
500
forest
600
wave
500


### Integrate whole data

In [None]:
train_data = None
for value in data_dict.values():
    if train_data is None:
        train_data = value
    else:
        train_data = np.concatenate((train_data, value), axis=0)

train_data = np.expand_dims(train_data, axis=2)
np.random.shuffle(train_data)
print(train_data.shape)

(3600, 80000, 1)


In [None]:
print(train_data.shape)

(3600, 80000, 1)


### Integrate some data

In [None]:
# 특정 sound만 학습해보기
train_data = None
for target in ['wind', 'wave', 'forest']:
    if train_data is None:
        train_data = data_dict[target]
    else:
        train_data = np.concatenate((train_data, data_dict[target]), axis=0)

# for key, value in data_dict.items():
#     if key in ['rain', 'forest']:
#         if train_data is None:
#             train_data = value
#         else:
#             train_data = np.concatenate((train_data, value), axis=0)

train_data = np.expand_dims(train_data, axis=2)
np.random.shuffle(train_data)
print(train_data.shape)

(1600, 80000, 1)


# GAN Model

In [None]:
latent_dim = 64

## Generator

In [None]:
# DATA = (N, 80000, 1)
# 64 -> 1250 -> 5000 -> 20000 -> 80000
def create_generator():
    input = Input(shape=(latent_dim, ))
    x = Dense(1250 * 64)(input)
    x = ReLU()(x)
    x = Reshape((1250, 64))(x)

    x = Conv1D(128, 5, padding='same')(x)
    x = ReLU()(x)

    x = UpSampling1D(size=4)(x)  # 5000, 128
    x = Conv1D(128, 4, padding='same')(x)
    x = ReLU()(x)

    x = UpSampling1D(size=4)(x)  # 20000, 128
    x = Conv1D(128, 4, padding='same')(x)
    x = ReLU()(x)

    x = UpSampling1D(size=4)(x)  # 80000, 128
    x = Conv1D(128, 4, padding='same')(x)
    # x = Activation('tanh')(x)
    x = ReLU()(x)

    x = Conv1D(256, 4, padding='same')(x)  # 80000, 256
    # x = Activation('tanh')(x)
    x = ReLU()(x)

    x = Conv1D(1, 4, padding='same')(x)  # 80000, 1
    x = Activation('tanh')(x)

    return Model(input, x)


generator = create_generator()
generator.summary()



Model: "functional_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 64)]              0         
_________________________________________________________________
dense_2 (Dense)              (None, 80000)             5200000   
_________________________________________________________________
re_lu_6 (ReLU)               (None, 80000)             0         
_________________________________________________________________
reshape_1 (Reshape)          (None, 1250, 64)          0         
_________________________________________________________________
conv1d_10 (Conv1D)           (None, 1250, 128)         41088     
_________________________________________________________________
re_lu_7 (ReLU)               (None, 1250, 128)         0         
_________________________________________________________________
up_sampling1d_3 (UpSampling1 (None, 5000, 128)        

## Discriminator

In [None]:
def create_discriminator():
    input = Input(shape=(80000, 1))
    x = Conv1D(128, 4, strides=4)(input)  # 20000, 128
    x = LeakyReLU()(x)

    x = Conv1D(128, 4, strides=4)(x)  # 5000, 128
    x = LeakyReLU()(x)

    x = Conv1D(128, 4, strides=4)(x)  # 1250, 128
    x = LeakyReLU()(x)

    x = Conv1D(256, 4, strides=4)(x)  # 1250, 256  kernel_regularizer=keras.regularizers.l2(0.01)
    x = LeakyReLU()(x)

    x = Flatten()(x)

    x = Dropout(0.6)(x)  # 무작위성 추가

    x = Dense(1, activation='sigmoid')(x)

    return Model(input, x)

discriminator = create_discriminator()
discriminator.summary()

discriminator_optimizer = Adam(learning_rate=0.0002, beta_1=0.5)
discriminator.compile(optimizer=discriminator_optimizer, loss='binary_crossentropy')

Model: "functional_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 80000, 1)]        0         
_________________________________________________________________
conv1d_16 (Conv1D)           (None, 20000, 128)        640       
_________________________________________________________________
leaky_re_lu_4 (LeakyReLU)    (None, 20000, 128)        0         
_________________________________________________________________
conv1d_17 (Conv1D)           (None, 5000, 128)         65664     
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU)    (None, 5000, 128)         0         
_________________________________________________________________
conv1d_18 (Conv1D)           (None, 1250, 128)         65664     
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 1250, 128)        

## GAN

In [None]:
# In GAN, turn off train of discriminator
discriminator.trainable = False

gan_input = Input(shape=(latent_dim, ))
gan_output = discriminator(generator(gan_input))
gan = Model(gan_input, gan_output)

gan_optimizer = Adam(learning_rate=0.0002, beta_1=0.5)
gan.compile(optimizer=gan_optimizer, loss='binary_crossentropy')


## Load Model

In [None]:
gan.load_weights(PATH_WEIGHT + 'gan_wind_wave2.h5')

# Train

In [None]:
epoch = 300
batch_size = 10
PATH_RESULT = 'result_part/'
PATH_WEIGHT = 'weight/'

In [None]:
start = 0
print('Training start!')
start_time = time.time()  # Running time
data_num = len(train_data)

for step in range(0, 1 + epoch):

    while True:
        stop = start + batch_size
        real_sounds = train_data[start:stop]
        data_size = len(real_sounds)

        random_z_vectors = np.random.normal(size=(data_size, latent_dim))

        generated_sounds = generator.predict(random_z_vectors)
        
        fake_real_sounds = np.concatenate((generated_sounds, real_sounds), axis=0)  # (2 * batch_size, 80000)

        labels = np.concatenate((np.zeros((data_size, 1)), np.ones((data_size, 1))))  # 0: 가짜, 1: 진짜
        # labels = np.concatenate((np.full((data_size, 1), -1), np.ones((data_size, 1))))  # -1: 가짜, 1: 진짜
        labels += 0.05 * np.random.random(labels.shape)  # 무작위성 추가

        discriminator.trainable = True  # 판별자 학습 on
        d_loss = discriminator.train_on_batch(fake_real_sounds, labels)  # discriminator train

        random_z_vectors = np.random.normal(size=(data_size, latent_dim))

        fake_labels = np.ones((data_size, 1))  # 가짜를 진짜처럼 만들기

        discriminator.trainable = False  # 판별자 학습 off
        g_loss = gan.train_on_batch(random_z_vectors, fake_labels)  # generator train

        start += batch_size
        if start >= data_num:
            start = 0
            break


    # epoch 마다 모델 저장
    gan.save_weights(PATH_WEIGHT + 'gan_wind_wave_forest1.h5')

    print('-' * 15)
    print('step: ', step)
    print('d_loss: ', d_loss)
    print('g_loss: ', g_loss)

    # 50 epoch 마다 audio 저장
    if step % 50 == 0:
        chunk_fake = generated_sounds[0].reshape((-1))
        chunk_fake *= 1.5  # volume up
        librosa.output.write_wav(PATH_RESULT + f'fake_wind_wave_forest1_{step}.wav', chunk_fake, 16000)

    run_time = time.time() - start_time
    if run_time > 60:
        minutes = run_time // 60
        seconds = run_time % 60

        print(f'Running time in step 100: {minutes}m {seconds}s')
    else:
        print(f'Running time in step 100: {run_time}s')

    start_time = time.time()  # Running time



Training start!
---------------
step:  0
d_loss:  0.6717292666435242
g_loss:  0.6885030269622803
Running time in step 100: 8.0m 25.326019525527954s
---------------
step:  1
d_loss:  0.6529699563980103
g_loss:  0.7119371294975281
Running time in step 100: 1.0m 54.633957862854004s
---------------
step:  2
d_loss:  0.6234095692634583
g_loss:  0.6660753488540649
Running time in step 100: 1.0m 55.324525356292725s
---------------
step:  3
d_loss:  0.7028738856315613
g_loss:  0.6551417708396912
Running time in step 100: 1.0m 55.80712032318115s
---------------
step:  4
d_loss:  0.6854034066200256
g_loss:  0.6624038815498352
Running time in step 100: 1.0m 55.30296778678894s
---------------
step:  5
d_loss:  0.6678317785263062
g_loss:  0.5986063480377197
Running time in step 100: 1.0m 54.99700355529785s
---------------
step:  6
d_loss:  0.6608815789222717
g_loss:  0.6925750374794006
Running time in step 100: 1.0m 55.877877950668335s
---------------
step:  7
d_loss:  0.6243484616279602
g_loss:  0