In [None]:
import numpy as np
import os
from cv2 import imread, resize
import pandas as pd

def fetch_lfw_dataset(attrs_name = "lfw_attributes.txt",
                      images_name = "lfw-deepfunneled",
                      raw_images_name = "lfw",
                      use_raw=False,
                      dx=80,dy=80,
                      dimx=45,dimy=45
    ):#sad smile

    #download if not exists
    if (not use_raw) and not os.path.exists(images_name):
        print("images not found, donwloading...")
        os.system("wget http://vis-www.cs.umass.edu/lfw/lfw-deepfunneled.tgz -O tmp.tgz")
        print("extracting...")
        os.system("tar xvzf tmp.tgz && rm tmp.tgz")
        print("done")
        assert os.path.exists(images_name)

    if use_raw and not os.path.exists(raw_images_name):
        print("images not found, donwloading...")
        os.system("wget http://vis-www.cs.umass.edu/lfw/lfw.tgz -O tmp.tgz")
        print("extracting...")
        os.system("tar xvzf tmp.tgz && rm tmp.tgz")
        print("done")
        assert os.path.exists(raw_images_name)

    if not os.path.exists(attrs_name):
        print("attributes not found, downloading...")
        os.system("wget http://www.cs.columbia.edu/CAVE/databases/pubfig/download/%s"%attrs_name)
        print("done")

    #read attrs
    df_attrs = pd.read_csv("lfw_attributes.txt",sep='\t',skiprows=1,)
    df_attrs = pd.DataFrame(df_attrs.iloc[:,:-1].values, columns = df_attrs.columns[1:])
    df_attrs.imagenum = df_attrs.imagenum.astype(np.int64)


    #read photos
    dirname = raw_images_name if use_raw else images_name
    photo_ids = []
    for dirpath, dirnames, filenames in os.walk(dirname):
        for fname in filenames:
            if fname.endswith(".jpg"):
                fpath = os.path.join(dirpath,fname)
                photo_id = fname[:-4].replace('_',' ').split()
                person_id = ' '.join(photo_id[:-1])
                photo_number = int(photo_id[-1])
                photo_ids.append({'person':person_id,'imagenum':photo_number,'photo_path':fpath})

    photo_ids = pd.DataFrame(photo_ids)

    #mass-merge
    #(photos now have same order as attributes)
    df = pd.merge(df_attrs,photo_ids,on=('person','imagenum'))

    assert len(df)==len(df_attrs),"lost some data when merging dataframes"

    #image preprocessing
    all_photos = df['photo_path'].apply(imread)\
                                 .apply(lambda img: img[dy:-dy,dx:-dx])\
                                 .apply(lambda img: resize(img, (dimx, dimy)))

    all_photos = np.stack(all_photos.values).astype('uint8')
    all_attrs = df.drop(["photo_path","person","imagenum"],axis=1)

    return all_photos,all_attrs

In [None]:
import os
import numpy as np
import tensorflow as tf
import tensorflow.keras.layers as L
import tensorflow.keras.initializers as I
import matplotlib.pyplot as plt
import cv2 as cv
from keras.layers import Input
%matplotlib inline
from keras.layers import Conv2D,Conv2DTranspose,Dropout,LeakyReLU
from keras.layers import Flatten,Dense,BatchNormalization,Reshape
from keras.models import Sequential
from keras.models import Model,Sequential
from keras import backend as K
import keras
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
plt.rcParams.update({'axes.titlesize': 'small'})

data,attrs = fetch_lfw_dataset(dimx=36,dimy=36)


IMG_SHAPE = data.shape[1:]

In [None]:
data=np.array(data).astype("float32")

In [None]:
data = (data - 127.5) / 127.5

In [None]:
plt.imshow(data[50])

In [None]:
discriminator = Sequential()

discriminator.add(Conv2D(128, (3,3), strides=(2,2), padding='same', input_shape=(36,36,3)))
discriminator.add(LeakyReLU(alpha=0.2))
discriminator.add(Conv2D(128, (3,3), strides=(2,2), padding='same'))
discriminator.add(LeakyReLU(alpha=0.2))

discriminator.add(Flatten()) #shape of 8192
discriminator.add(Dropout(0.4))
discriminator.add(Dense(2, activation=tf.nn.softmax))

In [None]:
CODE_SIZE = 256
generator=Sequential()
generator.add(Dense(9*9*128, input_dim=CODE_SIZE))
generator.add(LeakyReLU(alpha=0.2))
generator.add(Reshape((9,9,128)))
generator.add(Conv2DTranspose(128, (4,4), strides=(2,2), padding='same'))
generator.add(LeakyReLU(alpha=0.2))
generator.add(Conv2DTranspose(128, (4,4), strides=(2,2), padding='same'))
generator.add(LeakyReLU(alpha=0.2))
generator.add(Conv2D(3, (8,8), activation='tanh', padding='same'))

In [None]:
def sample_noise_batch(bsize):
    return np.random.normal(size=(bsize, CODE_SIZE)).astype('float32')

def sample_data_batch(bsize):
    idxs = np.random.choice(np.arange(data.shape[0]), size=bsize)
    return data[idxs]

def sample_images(nrow,ncol, sharp=False):
    images = generator.predict(sample_noise_batch(bsize=nrow*ncol))
    #if np.var(images)!=0:
        #images = images.clip(np.min(data),np.max(data))
    for i in range(nrow*ncol):
        plt.subplot(nrow,ncol,i+1)
        if sharp:
            plt.imshow(images[i].reshape(36,36,3),cmap="gray", interpolation="none")
        else:
            plt.imshow(images[i].reshape(36,36,3),cmap="gray")
    plt.show()

def sample_probas(bsize):
    plt.title('Generated vs real data')
    plt.hist(np.exp(discriminator.predict(sample_data_batch(bsize)))[:,1],
             label='D(x)', alpha=0.5,range=[0,1])
    plt.hist(np.exp(discriminator.predict(generator.predict(sample_noise_batch(bsize))))[:,1],
             label='D(G(z))',alpha=0.5,range=[0,1])
    plt.legend(loc='best')
    plt.show()

In [None]:
disc_optimizer = tf.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)
gen_optimizer = tf.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)

In [None]:
from IPython import display
from tqdm import tnrange

for epoch in tnrange(20000):
    


    real_data = sample_data_batch(100)
    noise = sample_noise_batch(100)


    ########################
    #discriminator training#
    ########################
    for i in range(2):



        with tf.GradientTape() as tape:

        logp_real = discriminator(real_data)

        generated_data = generator(noise)

        logp_gen = discriminator(generated_data)

        d_loss = -tf.reduce_mean(tf.math.log(logp_real[:,1] + logp_gen[:,0]))

        #regularize
        d_loss += tf.reduce_mean(discriminator.layers[-1].kernel**2)

        #optimize


        gradients = tape.gradient(d_loss, discriminator.trainable_weights)
        disc_optimizer.apply_gradients(zip(gradients, discriminator.trainable_weights))

    print("discriminator loss",d_loss)


    ########################
    ###generator training###
    ########################

    for i in range(10):
        


        with tf.GradientTape() as tape:


            generated_data = generator(np.random.randn(100, 256))
            logp_gen = discriminator(generated_data)
            g_loss = -tf.reduce_mean(tf.math.log(logp_gen[:, 1]))

            gradients = tape.gradient(g_loss, generator.trainable_variables)
            gen_optimizer.apply_gradients(zip(gradients, generator.trainable_variables))

    print("generator loss",g_loss)

    if epoch %100==0:
        display.clear_output(wait=True)
        sample_images(2,3,True)
        sample_probas(1000)