In [1]:
path = '/Users/jmaxi/Data/MIDI/csv music tracks/builder data/color/'
rango = 357 #la cantidad de imagenes que hay(357 en total)

#DISCRIMINATOR
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, BatchNormalization, Input, GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LeakyReLU

# function for building the discriminator layers
def build_discriminator(start_filters, spatial_dim, filter_size):
    
    # function for building a CNN block for downsampling the image
    def add_discriminator_block(x, filters, filter_size):
      x = Conv2D(filters, filter_size, padding='same')(x)
      x = BatchNormalization()(x)
      x = Conv2D(filters, filter_size, padding='same', strides=2)(x)
      x = BatchNormalization()(x)
      x = LeakyReLU(0.3)(x)
      return x
    
    # input is an image with shape spatial_dim x spatial_dim and 3 channels
    inp = Input(shape=(spatial_dim, spatial_dim, 3))

    # design the discrimitor to downsample the image 4x
    x = add_discriminator_block(inp, start_filters, filter_size)
    x = add_discriminator_block(x, start_filters * 2, filter_size)
    x = add_discriminator_block(x, start_filters * 4, filter_size)
    x = add_discriminator_block(x, start_filters * 8, filter_size)
    
    # average and return a binary output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1, activation='sigmoid')(x)
    return Model(inputs=inp, outputs=x)


# GENERATOR
from tensorflow.keras.layers import Conv2DTranspose, Reshape

def build_generator(start_filters, filter_size, latent_dim):
  
  # function for building a CNN block for upsampling the image
  def add_generator_block(x, filters, filter_size):
      x = Conv2DTranspose(filters, filter_size, strides=2, padding='valid')(x)
      x = BatchNormalization()(x)
      x = LeakyReLU(0.3)(x)
      return x

  # input is a noise vector 
  inp = Input(shape=(latent_dim,))#AQUI ESTA EL PROBLEMA CREO YO
  #inp = Input(shape=(32,100))

  # projection of the noise vector into a tensor with 
  # same shape as last conv layer in discriminator
  x = Dense((4 * 4 * (start_filters * 8)/2), input_dim=latent_dim)(inp) #Aqui tengo que ver que onda cone estos numeros
  x = BatchNormalization()(x)
  x = Reshape(target_shape=(2, 2, start_filters * 16))(x) #SI AQUI PONGO 4,4 ME DA UN SHAPE DE 64,64, SI PONGO 2,2 ME DA 32,32

  # design the generator to upsample the image 4x
  x = add_generator_block(x, 8 *4, filter_size)
  x = add_generator_block(x, 8 *8, filter_size)
  x = add_generator_block(x, 8 *16, filter_size)
  #x = add_generator_block(x, start_filters, filter_size)

  # turn the output into a 3D tensor, an image with 3 channels 
  x = Conv2D(3, kernel_size=6, padding='valid', activation='tanh')(x)
  return Model(inputs=inp, outputs=x)


  #Esto es para preparar el dataset
#Voy a crear una lista de IDs para transfor una lista de IDS en dataframe asi evito un error que me dice que sample no puede ser aplicado a listas
import pandas as pd

def range_list(num = rango):
    list = []
    for i in range(1,num):
        list.append(i)

    return list

def create_image_id(rango = rango):

    ID = []

    for i in range(1,rango):
        num_id = 'clasical_songs{}.png'.format(i)
        ID.append(num_id)

    useless_list = range_list()
    ID = pd.DataFrame(ID, useless_list)

    return ID

my_ids = create_image_id()


# GAN

import pandas as pd
import os
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential

# load celebrity images attributes
df_celeb = my_ids
TOTAL_SAMPLES = len(my_ids) #Aqui se guarda 

# we will downscale the images
SPATIAL_DIM = 32
# size of noise vector
LATENT_DIM_GAN = 100
# filter size in conv layer
FILTER_SIZE = 5
# number of filters in conv layer
NET_CAPACITY = 16
# batch size
BATCH_SIZE_GAN = 32
# interval for displaying generated images
PROGRESS_INTERVAL = 80 
# directory for storing generated images
ROOT_DIR = 'visualization'
if not os.path.isdir(ROOT_DIR):
    os.mkdir(ROOT_DIR)
    


def construct_models(verbose=False):
    ### discriminator
    discriminator = build_discriminator(NET_CAPACITY, SPATIAL_DIM, FILTER_SIZE)
    # compile discriminator
    discriminator.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0002), metrics=['mae'])

    ### generator
    # do not compile generator
    generator = build_generator(NET_CAPACITY, FILTER_SIZE, LATENT_DIM_GAN)

    ### DCGAN 
    gan = Sequential()
    gan.add(generator)
    gan.add(discriminator)
    discriminator.trainable = False 
    gan.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0002), metrics=['mae'])

    if verbose: 
        generator.summary()
        discriminator.summary()
        gan.summary()
        
    return generator, discriminator, gan
  
generator_celeb, discriminator_celeb, gan_celeb = construct_models(verbose=True)


import numpy as np
import cv2
def get_real_celebrity(df, ID, size, total = 356):
    #df needs to be a PATH
    #y ID una lista con los ID
    cur_files = ID.sample(frac=1).iloc[0:size]
    X = np.empty(shape=(size, SPATIAL_DIM, SPATIAL_DIM, 3)) 
    for i in range(0,size):
        file = cur_files.iloc[i]
        img_uri = df + file[0]
        img = cv2.imread(img_uri) #AQUI LO LEE
        img = cv2.resize(img, (SPATIAL_DIM, SPATIAL_DIM)) #AQUI LO REDIMENSIONA pero vamos a ignorarlo por ahora
        #img = np.flip(img, axis=2)
        img = img.astype(np.float32) / 127.5 - 1.0 #Tengo que ver que operacion es esta.
        X[i] = img
    return X


### TRAINING
import cv2
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import clear_output
 
# number of discriminator updates per alternating training iteration
DISC_UPDATES = 1  
# number of generator updates per alternating training iteration
GEN_UPDATES = 1 

def run_training(generator, discriminator, gan, df, start_it=0, num_epochs=1000, ID = my_ids):

  # list for storing loss
    avg_loss_discriminator = []
    avg_loss_generator = []
    total_it = start_it

  # main training loop
    for epoch in range(num_epochs):

      # alternating training loop
        loss_discriminator = []
        loss_generator = []
        for it in range(200): 

          #### Discriminator training loop ####
            for i in range(DISC_UPDATES): 
              # select a random set of real images
                imgs_real = get_real_celebrity(df,ID, BATCH_SIZE_GAN, TOTAL_SAMPLES)
              # generate a set of random noise vectors
                noise = np.random.randn(BATCH_SIZE_GAN, LATENT_DIM_GAN)
              # generate a set of fake images using the generator
                imgs_fake = generator.predict(noise)
              # train the discriminator on real images with label 1
                d_loss_real = discriminator.train_on_batch(imgs_real, np.ones([BATCH_SIZE_GAN]))[1]
              # train the discriminator on fake images with label 0
                d_loss_fake = discriminator.train_on_batch(imgs_fake, np.zeros([BATCH_SIZE_GAN]))[1]

          # display some fake images for visual control of convergence
            if total_it % PROGRESS_INTERVAL == 0:
                plt.figure(figsize=(5,2))
                num_vis = min(BATCH_SIZE_GAN, 5)
                imgs_real = get_real_celebrity(df, ID, num_vis, TOTAL_SAMPLES)
                noise = np.random.randn(num_vis, LATENT_DIM_GAN)
                imgs_fake = generator.predict(noise)
                for obj_plot in [imgs_fake, imgs_real]:
                    plt.figure(figsize=(num_vis * 3, 3))
                    for b in range(num_vis):
                        disc_score = float(discriminator.predict(np.expand_dims(obj_plot[b], axis=0))[0])
                        plt.subplot(1, num_vis, b + 1)
                        plt.title(str(round(disc_score, 3)))
                        plt.imshow(obj_plot[b] * 0.5 + 0.5) 
                    if obj_plot is imgs_fake:
                        plt.savefig(os.path.join(ROOT_DIR, str(total_it).zfill(10) + '.jpg'), format='jpg', bbox_inches='tight')
                    plt.show()  

          #### Generator training loop ####
            loss = 0
            y = np.ones([BATCH_SIZE_GAN, 1]) 
            for j in range(GEN_UPDATES):
              # generate a set of random noise vectors
                noise = np.random.randn(BATCH_SIZE_GAN, LATENT_DIM_GAN)
              # train the generator on fake images with label 1
                loss += gan.train_on_batch(noise, y)[1]

          # store loss
            loss_discriminator.append((d_loss_real + d_loss_fake) / 2.)        
            loss_generator.append(loss / GEN_UPDATES)
            total_it += 1

      # visualize loss
    clear_output(True)
    print('Epoch', epoch)
    avg_loss_discriminator.append(np.mean(loss_discriminator))
    avg_loss_generator.append(np.mean(loss_generator))
    plt.plot(range(len(avg_loss_discriminator)), avg_loss_discriminator)
    plt.plot(range(len(avg_loss_generator)), avg_loss_generator)
    plt.legend(['discriminator loss', 'generator loss'])
    plt.show()

    return generator, discriminator, gan



generator_celeb, discriminator_celeb, gan_celeb = run_training(generator_celeb, 
                                                               discriminator_celeb, 
                                                               gan_celeb, 
                                                               num_epochs=30, 
                                                               df=path,
                                                               ID = my_ids)

Epoch 29


<Figure size 640x480 with 1 Axes>

In [2]:
noise = np.random.randn(BATCH_SIZE_GAN, LATENT_DIM_GAN)
imgs_fake = generator_celeb.predict(noise)

In [3]:
imgs_fake

array([[[[ 0.99982244, -0.92705566, -0.9828017 ],
         [ 0.99999183, -0.99031353,  0.81853926],
         [ 0.99994963, -0.99209136, -0.9935143 ],
         ...,
         [-0.9983881 , -0.99993384,  0.9987241 ],
         [-0.9999923 , -0.5126178 , -0.9999987 ],
         [ 0.21815246, -0.9995799 ,  0.88078004]],

        [[ 0.99985045,  0.99967206, -0.46551213],
         [ 0.99993104,  0.99238026,  0.9928679 ],
         [ 0.99997   ,  0.99997944, -0.48681632],
         ...,
         [-1.        ,  0.82462347, -0.9999994 ],
         [-1.        ,  0.9999957 , -0.99999994],
         [-0.9999931 , -0.9836405 , -0.7993042 ]],

        [[ 0.9998981 , -0.96811503, -0.986197  ],
         [ 0.9999975 , -0.9950921 ,  0.9002611 ],
         [ 0.99997705, -0.9974276 , -0.99154145],
         ...,
         [-1.        , -1.        ,  0.9996929 ],
         [-1.        , -0.99818   , -1.        ],
         [-0.9998979 , -0.99999976,  0.7507651 ]],

        ...,

        [[ 0.9992472 ,  0.99991643, -0

In [6]:
imgs_fake * 127.5 + 1.0

array([[[[ 128.47736 , -117.19959 , -124.30721 ],
         [ 128.49896 , -125.26498 ,  105.363754],
         [ 128.49358 , -125.491646, -125.67307 ],
         ...,
         [-126.29449 , -126.49156 ,  128.33733 ],
         [-126.49902 ,  -64.35877 , -126.49983 ],
         [  28.81444 , -126.44644 ,  113.29945 ]],

        [[ 128.48093 ,  128.45819 ,  -58.352795],
         [ 128.49121 ,  127.52848 ,  127.59065 ],
         [ 128.49619 ,  128.49738 ,  -61.06908 ],
         ...,
         [-126.5     ,  106.13949 , -126.49992 ],
         [-126.5     ,  128.49945 , -126.49999 ],
         [-126.499115, -124.41416 , -100.911285]],

        [[ 128.487   , -122.43467 , -124.74012 ],
         [ 128.49968 , -125.874245,  115.78329 ],
         [ 128.49707 , -126.17202 , -125.42153 ],
         ...,
         [-126.5     , -126.5     ,  128.46085 ],
         [-126.5     , -126.267944, -126.5     ],
         [-126.486984, -126.49997 ,   96.72255 ]],

        ...,

        [[ 128.40402 ,  128.48935 ,  -

Evidentemente el resultado es cualquier cosa pero lo que debo hacer ahora es ver como llega la imagen y que operacion se le aplica para normalizarla, asi pueda desnormalizarla. Esta entre -1 y 1. 