# Assignment 4 - Generative Adversarial Networks
 
**Authors:**

1.   Liav Bachar 205888472
2.   Naor Kolet 205533060


# Imports

In [47]:
import pandas as pd
import numpy as np



# TensorFlow
import tensorflow as tf

from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.layers import BatchNormalization, Dropout
from tensorflow.keras.layers import Conv2D, MaxPool2D, GlobalAveragePooling2D
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.applications.resnet_v2 import preprocess_input as preprocess_input_res
from tensorflow.keras.applications.vgg16 import preprocess_input as preprocess_input_vgg
from tensorflow.keras.applications import VGG16, ResNet50V2, EfficientNetB4,  MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.constraints import max_norm

# Scikit-learn
from sklearn.model_selection import train_test_split
# from sklearn.metrics import confusion_matrix, accuracy_score
# from sklearn.model_selection import StratifiedKFold
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import log_loss
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, OrdinalEncoder
 
# Plots
# import seaborn as sns
import matplotlib.pyplot as plt

# Misc.
from scipy.io import arff
import os
import random
import joblib
import cv2
from glob import glob
from tqdm import tqdm_notebook as tqdm

%matplotlib inline

In [4]:
SEED = 42
def set_seed():    
    random.seed(SEED)
    np.random.seed(SEED)
    tf.random.set_seed(SEED)

set_seed()

# Load Data

In [20]:
def load_arff_dataset(arff_path):
    data = arff.loadarff(arff_path)
    df = pd.DataFrame(data=data[0], columns=data[1].names())
    
    return df

In [21]:
# https://www.kaggle.com/uciml/pima-indians-diabetes-database
diab_df = load_arff_dataset(r'datasets/diabetes.arff')

In [27]:
diab_df

Unnamed: 0,preg,plas,pres,skin,insu,mass,pedi,age,class
0,6.0,148.0,72.0,35.0,0.0,33.6,0.627,50.0,b'tested_positive'
1,1.0,85.0,66.0,29.0,0.0,26.6,0.351,31.0,b'tested_negative'
2,8.0,183.0,64.0,0.0,0.0,23.3,0.672,32.0,b'tested_positive'
3,1.0,89.0,66.0,23.0,94.0,28.1,0.167,21.0,b'tested_negative'
4,0.0,137.0,40.0,35.0,168.0,43.1,2.288,33.0,b'tested_positive'
...,...,...,...,...,...,...,...,...,...
763,10.0,101.0,76.0,48.0,180.0,32.9,0.171,63.0,b'tested_negative'
764,2.0,122.0,70.0,27.0,0.0,36.8,0.340,27.0,b'tested_negative'
765,5.0,121.0,72.0,23.0,112.0,26.2,0.245,30.0,b'tested_negative'
766,1.0,126.0,60.0,0.0,0.0,30.1,0.349,47.0,b'tested_positive'


In [21]:
# https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)
cred_df = load_arff_dataset(r'datasets/german_credit.arff')

In [74]:
cred_df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,12,13,14,15,16,17,18,19,20,21
0,b'A11',6.0,b'A34',b'A43',1169.0,b'A65',b'A75',4.0,b'A93',b'A101',...,b'A121',67.0,b'A143',b'A152',2.0,b'A173',1.0,b'A192',b'A201',b'1'
1,b'A12',48.0,b'A32',b'A43',5951.0,b'A61',b'A73',2.0,b'A92',b'A101',...,b'A121',22.0,b'A143',b'A152',1.0,b'A173',1.0,b'A191',b'A201',b'2'
2,b'A14',12.0,b'A34',b'A46',2096.0,b'A61',b'A74',2.0,b'A93',b'A101',...,b'A121',49.0,b'A143',b'A152',1.0,b'A172',2.0,b'A191',b'A201',b'1'
3,b'A11',42.0,b'A32',b'A42',7882.0,b'A61',b'A74',2.0,b'A93',b'A103',...,b'A122',45.0,b'A143',b'A153',1.0,b'A173',2.0,b'A191',b'A201',b'1'
4,b'A11',24.0,b'A33',b'A40',4870.0,b'A61',b'A73',3.0,b'A93',b'A101',...,b'A124',53.0,b'A143',b'A153',2.0,b'A173',2.0,b'A191',b'A201',b'2'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,b'A14',12.0,b'A32',b'A42',1736.0,b'A61',b'A74',3.0,b'A92',b'A101',...,b'A121',31.0,b'A143',b'A152',1.0,b'A172',1.0,b'A191',b'A201',b'1'
996,b'A11',30.0,b'A32',b'A41',3857.0,b'A61',b'A73',4.0,b'A91',b'A101',...,b'A122',40.0,b'A143',b'A152',1.0,b'A174',1.0,b'A192',b'A201',b'1'
997,b'A14',12.0,b'A32',b'A43',804.0,b'A61',b'A75',4.0,b'A93',b'A101',...,b'A123',38.0,b'A143',b'A152',1.0,b'A173',1.0,b'A191',b'A201',b'1'
998,b'A11',45.0,b'A32',b'A43',1845.0,b'A61',b'A73',4.0,b'A93',b'A101',...,b'A124',23.0,b'A143',b'A153',1.0,b'A173',1.0,b'A192',b'A201',b'2'


# Preprocess Data

@attribute preg real
@attribute plas real
@attribute pres real
@attribute skin real
@attribute insu real
@attribute mass real
@attribute pedi real
@attribute age real
@attribute class {tested_negative,tested_positive}

In [62]:
diab_ct = make_column_transformer(
    (StandardScaler(), ['preg', 'plas', 'pres', 'skin', 'insu', 'mass', 'pedi', 'age']),
    (OrdinalEncoder(), ['class']),
    n_jobs=-1
)

In [101]:
diab_data = diab_ct.fit_transform(diab_df)
diab_x, diab_y = diab_data[:, :-1], diab_data[:, -1]
diab_x.shape, diab_y.shape

((768, 8), (768,))

@attribute 1 {A11, A12, A13, A14}
@attribute 2 numeric 
@attribute 3 {A30, A31, A32, A33, A34}
@attribute 4 {A40, A41, A42, A43, A44, A45, A46, A47, A48, A49, A410}
@attribute 5 numeric
@attribute 6 {A61, A62, A63, A64, A65}
@attribute 7 {A71, A72, A73, A74, A75}
@attribute 8 numeric
@attribute 9 {A91, A92, A93, A94, A95}
@attribute 10 {A101, A102, A103}
@attribute 11 numeric
@attribute 12 {A121, A122, A123, A124}
@attribute 13 numeric
@attribute 14 {A141, A142, A143}
@attribute 15 {A151, A152, A153}
@attribute 16 numeric
@attribute 17 {A171, A172, A173, A174}
@attribute 18 numeric
@attribute 19 {A191, A192}
@attribute 20 {A201, A202}
@attribute 21 {1,2}

In [76]:
cred_ct = make_column_transformer(
    (StandardScaler(), ['2', '5', '8', '11', '13', '16', '18']),
    (OneHotEncoder(), ['1', '3', '4', '6', '7', '9', '10', '12', '14', '15', '17', '19', '20']),
    (OrdinalEncoder(), ['21']),
    n_jobs=-1
)

In [100]:
cred_data = cred_ct.fit_transform(cred_df)
cred_x, cred_y = cred_data[:, :-1], cred_data[:, -1]
cred_x.shape, cred_y.shape

((1000, 61), (1000,))

# Model Architecture