# Assignment 4 - Generative Adversarial Networks
 
**Authors:**

1.   Liav Bachar 205888472
2.   Naor Kolet 205533060


# Imports

In [39]:
import pandas as pd
import numpy as np



# TensorFlow
import tensorflow as tf

from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.layers import BatchNormalization, Dropout
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy


# Scikit-learn
from sklearn.model_selection import train_test_split
# from sklearn.metrics import confusion_matrix, accuracy_score
# from sklearn.model_selection import StratifiedKFold
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import log_loss
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder, OrdinalEncoder
 
# Plots
# import seaborn as sns
import matplotlib.pyplot as plt

# Misc.
from scipy.io import arff
import os
import random
import joblib
import cv2
from glob import glob
from tqdm import tqdm_notebook as tqdm
import time

%matplotlib inline

In [2]:
SEED = 42
def set_seed():    
    random.seed(SEED)
    np.random.seed(SEED)
    tf.random.set_seed(SEED)

set_seed()

# Load Data

In [3]:
def load_arff_dataset(arff_path):
    data = arff.loadarff(arff_path)
    df = pd.DataFrame(data=data[0], columns=data[1].names())
    
    return df

In [4]:
# https://www.kaggle.com/uciml/pima-indians-diabetes-database
diab_df = load_arff_dataset(r'datasets/diabetes.arff')

In [5]:
diab_df

Unnamed: 0,preg,plas,pres,skin,insu,mass,pedi,age,class
0,6.0,148.0,72.0,35.0,0.0,33.6,0.627,50.0,b'tested_positive'
1,1.0,85.0,66.0,29.0,0.0,26.6,0.351,31.0,b'tested_negative'
2,8.0,183.0,64.0,0.0,0.0,23.3,0.672,32.0,b'tested_positive'
3,1.0,89.0,66.0,23.0,94.0,28.1,0.167,21.0,b'tested_negative'
4,0.0,137.0,40.0,35.0,168.0,43.1,2.288,33.0,b'tested_positive'
...,...,...,...,...,...,...,...,...,...
763,10.0,101.0,76.0,48.0,180.0,32.9,0.171,63.0,b'tested_negative'
764,2.0,122.0,70.0,27.0,0.0,36.8,0.340,27.0,b'tested_negative'
765,5.0,121.0,72.0,23.0,112.0,26.2,0.245,30.0,b'tested_negative'
766,1.0,126.0,60.0,0.0,0.0,30.1,0.349,47.0,b'tested_positive'


In [6]:
# https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)
cred_df = load_arff_dataset(r'datasets/german_credit.arff')

In [7]:
cred_df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,12,13,14,15,16,17,18,19,20,21
0,b'A11',6.0,b'A34',b'A43',1169.0,b'A65',b'A75',4.0,b'A93',b'A101',...,b'A121',67.0,b'A143',b'A152',2.0,b'A173',1.0,b'A192',b'A201',b'1'
1,b'A12',48.0,b'A32',b'A43',5951.0,b'A61',b'A73',2.0,b'A92',b'A101',...,b'A121',22.0,b'A143',b'A152',1.0,b'A173',1.0,b'A191',b'A201',b'2'
2,b'A14',12.0,b'A34',b'A46',2096.0,b'A61',b'A74',2.0,b'A93',b'A101',...,b'A121',49.0,b'A143',b'A152',1.0,b'A172',2.0,b'A191',b'A201',b'1'
3,b'A11',42.0,b'A32',b'A42',7882.0,b'A61',b'A74',2.0,b'A93',b'A103',...,b'A122',45.0,b'A143',b'A153',1.0,b'A173',2.0,b'A191',b'A201',b'1'
4,b'A11',24.0,b'A33',b'A40',4870.0,b'A61',b'A73',3.0,b'A93',b'A101',...,b'A124',53.0,b'A143',b'A153',2.0,b'A173',2.0,b'A191',b'A201',b'2'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,b'A14',12.0,b'A32',b'A42',1736.0,b'A61',b'A74',3.0,b'A92',b'A101',...,b'A121',31.0,b'A143',b'A152',1.0,b'A172',1.0,b'A191',b'A201',b'1'
996,b'A11',30.0,b'A32',b'A41',3857.0,b'A61',b'A73',4.0,b'A91',b'A101',...,b'A122',40.0,b'A143',b'A152',1.0,b'A174',1.0,b'A192',b'A201',b'1'
997,b'A14',12.0,b'A32',b'A43',804.0,b'A61',b'A75',4.0,b'A93',b'A101',...,b'A123',38.0,b'A143',b'A152',1.0,b'A173',1.0,b'A191',b'A201',b'1'
998,b'A11',45.0,b'A32',b'A43',1845.0,b'A61',b'A73',4.0,b'A93',b'A101',...,b'A124',23.0,b'A143',b'A153',1.0,b'A173',1.0,b'A192',b'A201',b'2'


# Preprocess Data

@attribute preg real
@attribute plas real
@attribute pres real
@attribute skin real
@attribute insu real
@attribute mass real
@attribute pedi real
@attribute age real
@attribute class {tested_negative,tested_positive}

In [13]:
diab_ct = make_column_transformer(
    (MinMaxScaler(), ['preg', 'plas', 'pres', 'skin', 'insu', 'mass', 'pedi', 'age']),
    (OrdinalEncoder(), ['class']),
    n_jobs=-1
)

In [14]:
diab_data = diab_ct.fit_transform(diab_df)
diab_x, diab_y = diab_data[:, :-1], diab_data[:, -1]
diab_x.shape, diab_y.shape

((768, 8), (768,))

@attribute 1 {A11, A12, A13, A14}
@attribute 2 numeric 
@attribute 3 {A30, A31, A32, A33, A34}
@attribute 4 {A40, A41, A42, A43, A44, A45, A46, A47, A48, A49, A410}
@attribute 5 numeric
@attribute 6 {A61, A62, A63, A64, A65}
@attribute 7 {A71, A72, A73, A74, A75}
@attribute 8 numeric
@attribute 9 {A91, A92, A93, A94, A95}
@attribute 10 {A101, A102, A103}
@attribute 11 numeric
@attribute 12 {A121, A122, A123, A124}
@attribute 13 numeric
@attribute 14 {A141, A142, A143}
@attribute 15 {A151, A152, A153}
@attribute 16 numeric
@attribute 17 {A171, A172, A173, A174}
@attribute 18 numeric
@attribute 19 {A191, A192}
@attribute 20 {A201, A202}
@attribute 21 {1,2}

In [15]:
cred_ct = make_column_transformer(
    (MinMaxScaler(), ['2', '5', '8', '11', '13', '16', '18']),
    (OneHotEncoder(), ['1', '3', '4', '6', '7', '9', '10', '12', '14', '15', '17', '19', '20']),
    (OrdinalEncoder(), ['21']),
    n_jobs=-1
)

In [16]:
cred_data = cred_ct.fit_transform(cred_df)
cred_x, cred_y = cred_data[:, :-1], cred_data[:, -1]
cred_x.shape, cred_y.shape

((1000, 61), (1000,))

# Model Architecture

In [35]:
def generator_model(noise_shape, output_shape):
    inp = Input(shape=noise_shape)
    
    X = Dense(32, activation='relu')(inp)
    X = Dense(64, activation='relu')(X)
    X = Dense(128, activation='relu')(X)
    
    out = Dense(output_shape, activation='sigmoid')(X)
    
    model = Model(inp, out)
    
    return model

In [36]:
def discriminator_model(input_shape):
    inp = Input(shape=input_shape)
    
    X = Dense(64, activation='relu')(inp)
    X = Dense(32, activation='relu')(X)
    X = Dense(16, activation='relu')(X)
    
    out = Dense(1, activation='sigmoid')(X)
    
    model = Model(inp, out)
    
    return model

In [25]:
bce = BinaryCrossentropy(from_logits=True)

def discriminator_loss(real, fake):
    real_loss = bce(tf.ones_like(real), real)
    fake_loss = bce(tf.zeros_like(fake), fake)
    total_loss = (real_loss + fake_loss) / 2
    return total_loss

def generator_loss(fake):
    return bce(tf.ones_like(fake), fake)

In [102]:
disc_opt = Adam(1e-4)
genr_opt = Adam(1e-4)
# Notice the use of `tf.function`
# This annotation causes the function to be "compiled".
# @tf.function
def train_step(samples, generator, discriminator, batch_size):
    noise = tf.random.normal([batch_size, noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_samples = generator(noise, training=True)

        real_output = discriminator(samples, training=True)
        fake_output = discriminator(generated_samples, training=True)

        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    genr_opt.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    disc_opt.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
    
    return gen_loss, disc_loss

In [103]:
def train_gan(train_data, epochs=50, noise_dim=5, batch_size=64, buffer_size=10000):
    train_dataset = tf.data.Dataset.from_tensor_slices(train_data).shuffle(buffer_size).batch(batch_size)
    
    discriminator = discriminator_model(train_data.shape[1])
    generator = generator_model(noise_dim, train_data.shape[1])

    for epoch in range(epochs):
        start = time.time()
        gen_loss = 0
        disc_loss = 0
        num_batchs = 0
        
        for sample_batch in train_dataset:
            num_batchs += 1
            curr_gen_loss, curr_disc_loss = train_step(sample_batch, generator, discriminator, batch_size=batch_size)
            gen_loss += curr_gen_loss
            disc_loss += curr_disc_loss
            
        print (f'Time for epoch {epoch + 1} is {time.time()-start :.4f} sec')
        print(f'\tGenerator loss: {gen_loss/num_batchs:.4f}  Discriminator loss: {disc_loss/num_batchs:.4f}')
        
    return generator, discriminator

In [98]:
train_gan(diab_x)

Time for epoch 1 is 0.2760 sec
	Generator loss: 0.7318  Discriminator loss: 0.6867
Time for epoch 2 is 0.2449 sec
	Generator loss: 0.7498  Discriminator loss: 0.6805
Time for epoch 3 is 0.2422 sec
	Generator loss: 0.7639  Discriminator loss: 0.6757
Time for epoch 4 is 0.2465 sec
	Generator loss: 0.7743  Discriminator loss: 0.6720
Time for epoch 5 is 0.2524 sec
	Generator loss: 0.7811  Discriminator loss: 0.6692
Time for epoch 6 is 0.2482 sec
	Generator loss: 0.7858  Discriminator loss: 0.6666
Time for epoch 7 is 0.2461 sec
	Generator loss: 0.7883  Discriminator loss: 0.6647
Time for epoch 8 is 0.2646 sec
	Generator loss: 0.7914  Discriminator loss: 0.6625
Time for epoch 9 is 0.2491 sec
	Generator loss: 0.7891  Discriminator loss: 0.6623
Time for epoch 10 is 0.2605 sec
	Generator loss: 0.7888  Discriminator loss: 0.6615
Time for epoch 11 is 0.2585 sec
	Generator loss: 0.7879  Discriminator loss: 0.6615
Time for epoch 12 is 0.2684 sec
	Generator loss: 0.7860  Discriminator loss: 0.6618
T

(<tensorflow.python.keras.engine.functional.Functional at 0x7f6ee0259eb0>,
 <tensorflow.python.keras.engine.functional.Functional at 0x7f6ee0260520>)

In [104]:
train_gan(cred_x, epochs=100)

Time for epoch 1 is 0.3696 sec
	Generator loss: 0.8320  Discriminator loss: 0.6852
Time for epoch 2 is 0.3418 sec
	Generator loss: 0.8862  Discriminator loss: 0.6476
Time for epoch 3 is 0.3247 sec
	Generator loss: 0.9215  Discriminator loss: 0.6191
Time for epoch 4 is 0.3372 sec
	Generator loss: 0.9610  Discriminator loss: 0.5928
Time for epoch 5 is 0.3485 sec
	Generator loss: 0.9953  Discriminator loss: 0.5686
Time for epoch 6 is 0.3699 sec
	Generator loss: 1.0273  Discriminator loss: 0.5457
Time for epoch 7 is 0.3765 sec
	Generator loss: 1.0469  Discriminator loss: 0.5260
Time for epoch 8 is 0.3551 sec
	Generator loss: 1.0576  Discriminator loss: 0.5081
Time for epoch 9 is 0.3581 sec
	Generator loss: 1.0622  Discriminator loss: 0.4918
Time for epoch 10 is 0.3264 sec
	Generator loss: 1.0460  Discriminator loss: 0.4836
Time for epoch 11 is 0.3501 sec
	Generator loss: 1.0316  Discriminator loss: 0.4788
Time for epoch 12 is 0.3251 sec
	Generator loss: 1.0021  Discriminator loss: 0.4799
T

(<tensorflow.python.keras.engine.functional.Functional at 0x7f6ee020d850>,
 <tensorflow.python.keras.engine.functional.Functional at 0x7f6ee018e190>)

### Forgiving Teacher

In [113]:

# Notice the use of `tf.function`
# This annotation causes the function to be "compiled".
# @tf.function
def train_step(samples, generator, discriminators, batch_size, genr_opt, disc_opt):
    noise = tf.random.normal([batch_size, noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_samples = generator(noise, training=True)

        real_outputs = [disc(samples, training=True) for disc in discriminators]
        fake_outputs = [disc(generated_samples, training=True) for disc in discriminators]

        gen_loss = [generator_loss(fake_output) for fake_output in fake_outputs]
        disc_loss = [discriminator_loss(real_output, fake_output) for real_output, fake_output in zip(real_outputs, fake_outputs)]

    _ = [print(type(gl)) for gl in gen_loss]
    gen_loss = tf.reduce_mean(gen_loss)
    
    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    genr_opt.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    
    for i in range(len(discriminators)):
        gradients_of_discriminator = disc_tape.gradient(disc_loss[i], discriminators[i].trainable_variables)
        disc_opt[i].apply_gradients(zip(gradients_of_discriminator, discriminators[i].trainable_variables))
    
    return gen_loss, disc_loss

In [107]:
def train_gan(train_data, disc_num=1, epochs=50, noise_dim=5, batch_size=64, buffer_size=10000):
    train_dataset = tf.data.Dataset.from_tensor_slices(train_data).shuffle(buffer_size).batch(batch_size)
    
    discriminators = [discriminator_model(train_data.shape[1]) for _ in range(disc_num)]
    generator = generator_model(noise_dim, train_data.shape[1])
    
    disc_opt = [Adam(1e-4) for _ in range(disc_num)]
    genr_opt = Adam(1e-4)

    for epoch in range(epochs):
        start = time.time()
        gen_loss = 0
        disc_loss = 0
        num_batchs = 0
        
        for sample_batch in train_dataset:
            num_batchs += 1
            curr_gen_loss, curr_disc_loss = train_step(sample_batch, generator, discriminators, batch_size, genr_opt, disc_opt)
            gen_loss += curr_gen_loss
            disc_loss += curr_disc_loss
            
        print (f'Time for epoch {epoch + 1} is {time.time()-start :.4f} sec')
        print(f'\tGenerator loss: {gen_loss/num_batchs:.4f}  Discriminator loss: {disc_loss/num_batchs:.4f}')
        
    return generator, discriminators

In [114]:
train_gan(cred_x, epochs=100)

<class 'tensorflow.python.framework.ops.EagerTensor'>


ValueError: No gradients provided for any variable: ['dense_112/kernel:0', 'dense_112/bias:0', 'dense_113/kernel:0', 'dense_113/bias:0', 'dense_114/kernel:0', 'dense_114/bias:0', 'dense_115/kernel:0', 'dense_115/bias:0'].