### Import module

In [None]:
from glob import glob
import os, sys
from os.path import join, dirname

import datetime, time
import csv
from glob import glob
import chardet
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler, Normalizer, MaxAbsScaler, MinMaxScaler

import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.python.client import device_lib
from tensorflow import keras
from tensorflow.keras import layers, Model
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, Flatten, Dense, Conv2D, SimpleRNN, LSTM, GRU, Reshape, RepeatVector, Conv2DTranspose, Activation, GlobalAveragePooling1D
from tensorflow.keras.layers import MaxPooling1D, MaxPooling2D, Bidirectional, TimeDistributed,  Attention, BatchNormalization, Dropout, Lambda, Conv1D
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.optimizers import Adadelta, RMSprop,SGD,Adam
from tensorflow.keras.utils import plot_model
from tensorflow.keras.utils import model_to_dot

from IPython.display import SVG

import imblearn

import matplotlib.pyplot as plt
%matplotlib inline

print(tf.__version__)
device_lib.list_local_devices()

### Global Parameter

In [None]:
num_classes = 7                           # {"0" : "Playing", "1" : "Talking", "2" : "Petting", "3" : "TV / Radio", "4" : "Eating / Cooking", "5" : "Moved It", "6" : "None of the above", "7" : "Other"}
time_offset = 5                          # 5초 단위 window: 50, 10초 단위 window: 90, 15초 단위 window: 128
window_size = 50
overlap_ratio = 0.5
bi_class = 1                             # Binary Classification (1 : Playing or not, 2 : Talking or not, 3 : Petting or not, 4: TV / Radio or not, 5 : Eating / Cooking or not, 6 : Moved It or not)
cross_val = 0
rand_st=2
mode = 0                                 # Split data {0: Didn't split, 1: US only, 2: Korea only, 3: train with US and test with Korea 4: train with Korea and test with US}

### Load Data

In [None]:
data_fname = '../Data/Preprocessed(new)/preprocessed_data(New collar_2).csv'

### Preprocess Data

In [None]:
data = pd.read_csv(data_fname)
data = data.fillna(0)

In [None]:
iaq = data['iaq']
iaq_cat = []

for num in iaq:
    if num < 50 and num >=0:
        iaq_cat.append('Good')
    elif num >= 50 and num < 100:
        iaq_cat.append('Average')
    elif num >= 100 and num < 150:
        iaq_cat.append('Little bad')
    elif num >= 150 and num < 200:
        iaq_cat.append('Bad')
    elif num >= 200 and num < 300:
        iaq_cat.append('Worse')
    elif num >= 300 and num <= 500:
        iaq_cat.append('Very bad')
    else:
        print(num)
data['iaq_cat'] = iaq_cat

In [None]:
data = pd.concat([data,pd.get_dummies(data['sound category'])],axis=1)         # Onehot encode sound category
data = pd.concat([data,pd.get_dummies(data['orientation_cat'])],axis=1)        # Onehot encode orientation category
data = pd.concat([data,pd.get_dummies(data['iaq_cat'])],axis=1)                # Onehot encode iaq category

In [None]:
# Create rowID list
rowID_list = np.array(data['RowID'].drop_duplicates())
data = data.to_records(index=False)

In [None]:
# Normalize Data

scaler = StandardScaler()

# data['pressure'] = scaler.fit_transform(data['pressure'].reshape(-1,1)).reshape(-1)
data['gasResistance'] = scaler.fit_transform(data['gasResistance'].reshape(-1,1)).reshape(-1)
data['staticIaq'] = scaler.fit_transform(data['staticIaq'].reshape(-1,1)).reshape(-1)
data['co2Equivalent'] = scaler.fit_transform(data['co2Equivalent'].reshape(-1,1)).reshape(-1)
data['breathVocEquivalent'] = scaler.fit_transform(data['breathVocEquivalent'].reshape(-1,1)).reshape(-1)
data['audioLevel'] = scaler.fit_transform(data['audioLevel'].reshape(-1,1)).reshape(-1)
data['rawTemp'] = scaler.fit_transform(data['rawTemp'].reshape(-1,1)).reshape(-1)
data['rawHumidity'] = scaler.fit_transform(data['rawHumidity'].reshape(-1,1)).reshape(-1)
data['pressure'] = scaler.fit_transform(data['pressure'].reshape(-1,1)).reshape(-1)

In [None]:
# Split US and Korea
us_rowIDs = []
korea_rowIDs = []

if mode != 0:
    for rowid in rowID_list:
    #     print(rowid, rowid[0])
        if rowid[0] == '1':
            korea_rowIDs.append(rowid)
        else:
            us_rowIDs.append(rowid)

In [None]:
# feature_col_name = ['accX', 'accY', 'accZ', 'chord', 'orientation', 'ir', 'full', 'iaq', 'iaqAccuracy', 'rawTemp',
#                     'pressure', 'rawHumidity', 'gasResistance', 'compGasAccuracy', 'gasPercentageAccuracy', 'temperature', 
#                     'humidity', 'staticIaq', 'statIaqAccuracy', 'co2Equivalent', 'co2Accuracy', 'breathVocEquivalent', 
#                     'breathVocAccuracy', 'audioLevel', 'Loud', 'Moderate', 'Quiet']
feature_col_name = ['accX', 'accY', 'accZ', 'chord', 'full', 'iaq', 'rawTemp',
                    'pressure', 'rawHumidity', 'gasResistance', 'staticIaq', 'co2Equivalent', 'breathVocEquivalent', 
                    'audioLevel', 'Loud', 'Moderate', 'Quiet', 'Landscape Left Back', 'Landscape Left Front', 'Landscape Right Back',
                    'Landscape Right Front', 'Portrait Down Back', 'Portrait Down Front', 'Portrait Up Back', 
                    'Portrait Up Front', 'Average', 'Bad', 'Good', 'Little bad', 'Very bad', 'Worse']
target_col_name = ['Modality_cat']

In [None]:
feature_num = len(feature_col_name)

In [None]:
X = []
us_X = []
korea_X = []

Y = []
us_Y = []
korea_Y = []


if mode != 0:
    for rowID in us_rowIDs:
        #Split raw data by rowID & split X, Y data
        tmp_data = data[data['RowID'] == rowID]
        feature = tmp_data[feature_col_name]
        feature = np.array(feature.tolist())
        target = tmp_data[target_col_name][0][0]
        target = np.array(target.tolist())
        us_X.append(feature)
        us_Y.append(target)
    
    for rowID in korea_rowIDs:
        #Split raw data by rowID & split X, Y data
        tmp_data = data[data['RowID'] == rowID]
        feature = tmp_data[feature_col_name]
        feature = np.array(feature.tolist())
        target = tmp_data[target_col_name][0][0]
        target = np.array(target.tolist())
        korea_X.append(feature)
        korea_Y.append(target)

else:
    for rowID in rowID_list:
        #Split raw data by rowID & split X, Y data
        tmp_data = data[data['RowID'] == rowID]
        feature = tmp_data[feature_col_name]
        feature = np.array(feature.tolist())
        target = tmp_data[target_col_name][0][0]
        target = np.array(target.tolist())
        X.append(feature)
        Y.append(target)

In [None]:
if bi_class != 0:
    #Transit multi classification to binary classification
    if mode != 0:
        for idx in range(len(us_Y)):
            if us_Y[idx] == bi_class-1:
                us_Y[idx]=1
            else:
                us_Y[idx]=0
                
        for idx in range(len(korea_Y)):
            if korea_Y[idx] == bi_class-1:
                korea_Y[idx]=1
            else:
                korea_Y[idx]=0
    else:
        for idx in range(len(Y)):
            if Y[idx] == bi_class-1:
                Y[idx]=1
            else:
                Y[idx]=0

In [None]:
def X_preprocess(X, window_size, overlap_ratio):
    #Transform data shape using the set time window
    processed_X = []
    
    for i in range(len(X)):
        tmp_X = X[i]
        tmp = []
        start_row = 0
        end_row = start_row + window_size
        
        if len(tmp_X)%int(window_size*overlap_ratio) == 0:
            for j in range(len(tmp_X)//int(window_size*overlap_ratio)-1):
                tmp.append(tmp_X[int(start_row):int(end_row)])
                start_row += (window_size*overlap_ratio)
                end_row += (window_size*overlap_ratio)
        else:
            for j in range(len(tmp_X)//int(window_size*overlap_ratio)+1):
                if end_row > len(tmp_X):
                    
                    tmp.append(tmp_X[-window_size:])
                    start_row += (window_size*overlap_ratio)
                    end_row += (window_size*overlap_ratio)
                    break
                else:
                    
                    tmp.append(tmp_X[int(start_row):int(end_row)])
                    start_row += (window_size*overlap_ratio)
                    end_row += (window_size*overlap_ratio)
        processed_X.append(tmp)
        
    return processed_X

In [None]:
if mode != 0:
    us_X = X_preprocess(us_X, window_size, overlap_ratio)        ### preprocess with input shape
    korea_X = X_preprocess(korea_X, window_size, overlap_ratio)
    if bi_class == 0:
        ### onehot encode Y
        us_Y = np.eye(num_classes)[us_Y]
        korea_Y = np.eye(num_classes)[korea_Y]
    else: 
        us_Y = np.eye(2)[us_Y]
        korea_Y = np.eye(2)[korea_Y]

else:    
    X = X_preprocess(X, window_size, overlap_ratio)        ### preprocess with input shape
    if bi_class == 0:
        ### onehot encode Y
        Y = np.eye(num_classes)[Y]
    else: Y = np.eye(2)[Y]

In [None]:
# Subsample X Data size

def subsample(X, min_us_len, min_korea_len):
    sampled_X = []
    addon = 0
    
    if min_korea_len > min_us_len:
        if np.array(X).shape[1] == min_us_len:
            return X
        else:
            interval = min_korea_len / min_us_len
            quotient = int(np.modf(interval)[1])
            remainder = np.modf(interval)[0]

            for i in range(len(X)):
                temp_X = []
                for j in range(min_us_len):
                    if addon >= 1:
                        temp_X.append(X[i][j*quotient + 1])
                        addon = 0
                        addon += remainder
                    else:
                        temp_X.append(X[i][j*quotient])
                        addon += remainder

                sampled_X.append(temp_X)
            
    else:
        if np.array(X).shape[1] == min_korea_len:
            return X
        else:
            interval = min_us_len / min_korea_len
            quotient = int(np.modf(interval)[1])
            remainder = np.modf(interval)[0]

            for i in range(len(X)):
                temp_X = []
                for j in range(min_korea_len):
                    if addon >= 1:
                        temp_X.append(X[i][j*quotient + 1])
                        addon = 0
                        addon += remainder
                    else:
                        temp_X.append(X[i][j*quotient])
                        addon += remainder

            sampled_X.append(temp_X)
                    
    return sampled_X

In [None]:
# Fit to minimum length

min_len = 99999999
min_us_len = 99999999
min_korea_len = 99999999
min_X = []
min_us_X = []
min_korea_X = []

if mode == 0:
#     print('Start mode 0\n')
    for x in X:
        if len(x) < min_len:
            min_len = len(x)
#     print(min_len)

    for x in X:
        min_X.append(x[:min_len])

else:
    for x in us_X:
        if len(x) < min_us_len:
            min_us_len = len(x)
            
    for x in korea_X:
        if len(x) < min_korea_len:
            min_korea_len = len(x)
            
    if mode == 1:
        for x in us_X:
            min_us_X.append(x[:min_us_len])
        for x in korea_X:
            min_korea_X.append(x[:min_korea_len])
        
    elif mode == 2:
        min_korea_len = 60
        for x in us_X:
            min_us_X.append(x[:min_us_len])
        for x in korea_X:
            min_korea_X.append(x[:min_korea_len])
        
    else:
        if min_korea_len < min_us_len:
            min_len = min_korea_len
        else: min_len = min_us_len

        for x in us_X:
            min_us_X.append(x[:min_len])

        for x in korea_X:
            min_korea_X.append(x[:min_len])

In [None]:
# Drop duplicate

if bi_class != 0:
    
    target_list = []
    us_target_list = []
    korea_target_list = []
    del_list = []
    us_del_list = []
    korea_del_list = []
    
    if mode == 0:
        for i in range(len(Y)):
            if Y[i][1] == 1:
                target_list.append(i)

        for i in target_list:
            for j in range(len(min_X)):
                if j in target_list:
                    pass
                else:
                    if np.array_equal(np.array(min_X[i]), np.array(min_X[j])):
                        if j not in del_list:
                            del_list.append(j)
        X = []
        Target = []

        for i in range(len(Y)):
            if i not in del_list:
                X.append(min_X[i])
                Target.append(Y[i])
                
    else:
        for i in range(len(us_Y)):
            if us_Y[i][1] == 1:
                us_target_list.append(i)

        for i in us_target_list:
            for j in range(len(min_us_X)):
                if j in us_target_list:
                    pass
                else:
                    if np.array_equal(np.array(min_us_X[i]), np.array(min_us_X[j])):
                        if j not in us_del_list:
                            us_del_list.append(j)
                            
        for i in range(len(korea_Y)):
            if korea_Y[i][1] == 1:
                korea_target_list.append(i)

        for i in korea_target_list:
            for j in range(len(min_korea_X)):
                if j in korea_target_list:
                    pass
                else:
                    if np.array_equal(np.array(min_korea_X[i]), np.array(min_korea_X[j])):
                        if j not in korea_del_list:
                            korea_del_list.append(j)
        
        us_X = []
        us_Target = []
        korea_X = []
        korea_Target = []

        for i in range(len(us_Y)):
            if i not in us_del_list:
                us_X.append(min_us_X[i])
                us_Target.append(us_Y[i])
                
        for i in range(len(korea_Y)):
            if i not in korea_del_list:
                korea_X.append(min_korea_X[i])
                korea_Target.append(korea_Y[i])

else:
    target_list = []
    us_target_list = []
    korea_target_list = []
    del_list = []
    us_del_list = []
    korea_del_list = []
    
    if mode == 0:
        X = min_X
        Target = Y

    else:
        for i in range(len(us_Y)):
            if us_Y[i][1] == 1:
                us_target_list.append(i)

        for i in us_target_list:
            for j in range(len(min_us_X)):
                if j in us_target_list:
                    pass
                else:
                    if np.array_equal(np.array(min_us_X[i]), np.array(min_us_X[j])):
                        if j not in us_del_list:
                            us_del_list.append(j)

        for i in range(len(korea_Y)):
            if korea_Y[i][1] == 1:
                korea_target_list.append(i)

        for i in korea_target_list:
            for j in range(len(min_korea_X)):
                if j in korea_target_list:
                    pass
                else:
                    if np.array_equal(np.array(min_korea_X[i]), np.array(min_korea_X[j])):
                        if j not in korea_del_list:
                            korea_del_list.append(j)

        us_X = []
        us_Target = []
        korea_X = []
        korea_Target = []

        for i in range(len(us_Y)):
            if i not in us_del_list:
                us_X.append(min_us_X[i])
                us_Target.append(us_Y[i])

        for i in range(len(korea_Y)):
            if i not in korea_del_list:
                korea_X.append(min_korea_X[i])
                korea_Target.append(korea_Y[i])

In [None]:
if mode == 0:
#     X = subsample(X, min_us_len, min_korea_len)
    X_train, X_test, Y_train, Y_test = train_test_split(X, Target, test_size=0.2)
    
elif mode == 1:
    us_X = subsample(us_X, min_us_len, min_korea_len)
    X_train, X_test, Y_train, Y_test = train_test_split(us_X, us_Target, test_size=0.2)

elif mode == 2:
    korea_X = subsample(korea_X, min_us_len, min_korea_len)
    X_train, X_test, Y_train, Y_test = train_test_split(korea_X, korea_Target, test_size=0.2)

elif mode == 3:
    X_train = subsample(us_X, min_us_len, min_korea_len)
    X_test = subsample(korea_X, min_us_len, min_korea_len)
    Y_train = us_Target 
    Y_test = korea_Target

else:
    X_train = subsample(korea_X, min_us_len, min_korea_len)
    X_test = subsample(us_X, min_us_len, min_korea_len)
    Y_train = korea_Target
    Y_test = us_Target 

In [None]:
np.array(X_train).shape

In [None]:
np.array(X_test).shape

In [None]:
in_shape = np.array(X_train[0]).shape

-----------------------------------------
### End Setup, separate model sections
-----------------------------------------

### Generative Replay #1 - GAN
- https://keras.io/examples/generative/dcgan_overriding_train_step/
- https://towardsdatascience.com/writing-your-first-generative-adversarial-network-with-keras-2d16fd8d4889
- https://www.tensorflow.org/tutorials/generative/dcgan
- https://machinelearningmastery.com/how-to-develop-a-generative-adversarial-network-for-a-1-dimensional-function-from-scratch-in-keras/
- https://www.kaggle.com/function9/bidirectional-lstm-gan-music-generation
- https://wiki.pathmind.com/generative-adversarial-network-gan
- https://m.blog.naver.com/PostView.naver?isHttpsRedirect=true&blogId=chunjein&logNo=221589624838  
    
http://www.smartdesignlab.org/DL/%EC%8B%A0%EA%B8%B0%EC%88%A0/GAN_keras.html - 참고 필요  
http://www.smartdesignlab.org/DL/GAN_tf2.html  
https://deep-eye.tistory.com/63  
https://velog.io/@hyebbly/Deep-Learning-Loss-%EC%A0%95%EB%A6%AC-1-GAN-loss

https://machinelearningmastery.com/semi-supervised-generative-adversarial-network/  


In [None]:
### Data Setup ###

#Rebalance the data
if bi_class == 0:
    sm = imblearn.over_sampling.SMOTE()
    X_shape = np.array(X_train).shape
    Y_shape = np.array(Y_train).shape
    new_X_train = np.array(X_train).reshape(X_shape[0], X_shape[1]*X_shape[2]*X_shape[3])
    Y_train = np.array(Y_train).astype('float64')
    X_train, Y_train = sm.fit_resample(new_X_train, Y_train)
    temp = X_train.shape
    X_train = X_train.reshape([temp[0], X_shape[1], X_shape[2], X_shape[3]])
    Y_train = Y_train.reshape(temp[0], Y_shape[1])

else:
    sm = imblearn.over_sampling.SMOTE()         # random state do not set
    origin_shape = np.array(X_train).shape
    new_X_train = np.array(X_train).reshape(origin_shape[0], origin_shape[1]*origin_shape[2]*origin_shape[3])
    Y_train = np.array(Y_train).astype('float64')
    X_train, Y_train = sm.fit_resample(new_X_train, Y_train)
    temp = X_train.shape
    X_train = X_train.reshape([temp[0], origin_shape[1], origin_shape[2], origin_shape[3]])
    Y_train = np.eye(2)[Y_train.reshape(temp[0])]

In [None]:
# custom activation function
def custom_activation(output):
    logexpsum = K.sum(K.exp(output), axis=-1, keepdims=True)
    result = logexpsum / (logexpsum + 1.0)
    return result

In [None]:
in_shape[0]

In [None]:
 # define the standalone supervised and unsupervised discriminator models
def define_discriminator(in_shape=in_shape, RNN_unit=200, bi_class=bi_class, n_classes=num_classes):
    # image input
    in_image = Input(shape=in_shape)
    # downsample
    disc = Conv2D(31, 1, padding='same', name='disc_conv1')(in_image)
    disc = BatchNormalization(momentum=0.8, name='disc_BM1')(disc)
    disc = layers.LeakyReLU(alpha=0.2, name='disc_activ1')(disc)
    disc = Dropout(0.2)(disc)
    disc = Reshape((RNN_unit, -1), name='disc_reshape1')(disc)
    disc = LSTM(units=RNN_unit, activation='tanh', recurrent_activation='sigmoid', return_sequences=False, name='disc_rnn1')(disc)
#     disc = GRU(units=RNN_unit, activation='tanh', recurrent_activation='sigmoid', return_sequences=True, name='disc_rnn2')(disc)
    
#     disc = Conv1D(128, 1, padding='same', name='disc_conv2')(disc)
    
#     disc = GlobalAveragePooling1D()(disc)

    # flatten feature maps
#     fe = Flatten()(disc)
    # dropout
#     fe = Dropout(0.4)(fe)
    fe = Dense(32)(disc)
    # output layer nodes
    if bi_class == 0:
        fe = Dense(n_classes)(fe)
    
        # supervised output
        c_out_layer = Activation('softmax')(fe)
        # define and compile supervised discriminator model
        c_model = Model(in_image, c_out_layer)
        c_model.summary()
        c_model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=0.0002, beta_1=0.5), metrics=['accuracy'])
#         c_model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=0.0002, beta_1=0.5), metrics=[keras.metrics.CategoricalAccuracy(), keras.metrics.AUC(multi_label=True), tfa.metrics.F1Score(num_classes=num_classes)])
    else:
        fe = Dense(2)(fe)
    
        # supervised output
        c_out_layer = Activation('softmax')(fe)
        # define and compile supervised discriminator model
        c_model = Model(in_image, c_out_layer)
        c_model.summary()
        c_model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=0.0002, beta_1=0.5), metrics=['accuracy'])
#         c_model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0002, beta_1=0.5), metrics=['accuracy', keras.metrics.AUC()])
        
    # unsupervised output
    d_out_layer = Lambda(custom_activation)(fe)
    # define and compile unsupervised discriminator model
    d_model = Model(in_image, d_out_layer)
    d_model.summary()
    d_model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0002, beta_1=0.5))
    
    return d_model, c_model

In [None]:
# define the standalone generator model
def define_generator(input_shape, reshape=in_shape , feature_num=feature_num):
    # image generator input
    in_lat = Input(shape=(input_shape,))
    # foundation for 7x7 image
    gen = Dense(feature_num, use_bias=False, name='gen_Dense1')(in_lat)
#     gen = BatchNormalization(momentum=0.8, name='gen_BM1')(gen)
    gen = RepeatVector(276)(gen)
    gen = layers.LeakyReLU(alpha=0.2, name='gen_activ1')(gen)
    gen = LSTM(units=50, activation='tanh', recurrent_activation='sigmoid',return_sequences=True, name='gen_rnn1')(gen)
#     gen = BatchNormalization(momentum=0.8, name='gen_BM2')(gen)
    gen = tf.expand_dims(gen, -1)
#     gen = Reshape((276,50,1), name='gen_reshape1')(gen)
    gen_output = Conv2DTranspose(feature_num, 64, padding="same", activation='relu', name='gen_conv1')(gen)
    
    model = Model(inputs=in_lat, outputs=gen_output, name='Generator')
    model.summary()
    return model

In [None]:
# define the combined generator and discriminator model, for updating the generator
def define_gan(g_model, d_model):
    # make weights in the discriminator not trainable
    d_model.trainable = False
    # connect image output from generator as input to discriminator
    gan_output = d_model(g_model.output)
    # define gan model as taking noise and outputting a classification
    model = Model(g_model.input, gan_output)
    # compile model
    opt = Adam(lr=0.0002, beta_1=0.5)
    model.compile(loss='binary_crossentropy', optimizer=opt)
    return model

In [None]:
# load the images
def load_real_samples(X, Y):
    # load dataset
    y_index = tf.argmax(Y, axis=1)
    
#     print(X.shape, y_index.shape)
    return [X, y_index]

In [None]:
# select a supervised subset of the dataset, ensures classes are balanced
def select_supervised_samples(dataset, n_samples=X_train.shape[0], bi_class=bi_class, n_classes=num_classes):
    X, y = dataset
    X_list, y_list = list(), list()
    if bi_class != 0:
        n_classes = 2
    n_per_class = int(n_samples / n_classes)
    for i in range(n_classes):
        # get all images for this class
        X_with_class = X[y == i]
        # choose random instances
        ix = np.random.randint(0, len(X_with_class), n_per_class)
        # add to list
        [X_list.append(X_with_class[j]) for j in ix]
        [y_list.append(i) for j in ix]
    return np.asarray(X_list), np.asarray(y_list)

In [None]:
# generate points in latent space as input for the generator
def generate_latent_points(latent_dim, n_samples):
    # generate points in the latent space
    z_input = np.random.randn(latent_dim * n_samples)
    # reshape into a batch of inputs for the network
    z_input = z_input.reshape(n_samples, latent_dim)
    return z_input

In [None]:
# select real samples
def generate_real_samples(dataset, n_samples):
    # split into images and labels
    images, labels = dataset
#     # choose random instances
#     ix = np.random.randint(0, images.shape[0], n_samples)
#     print(ix)
    # select images and labels
    X, labels = images, labels
    # generate class labels
    y = np.ones((n_samples, 1))
    return [X, labels], y

In [None]:
# use the generator to generate n fake examples, with class labels
def generate_fake_samples(generator, latent_dim, n_samples):
    # generate points in latent space
    z_input = generate_latent_points(latent_dim, n_samples)
    # predict outputs
    images = generator.predict(z_input)
    # create class labels
    y = np.zeros((n_samples, 1))
    return images, y

In [None]:
# generate samples and save as a plot and save the model
def summarize_performance(step, g_model, c_model, latent_dim, dataset, n_samples=32):
    # evaluate the classifier model
    X, y = dataset
    _, acc = c_model.evaluate(X, y, verbose=0)
    print('Classifier Accuracy: %.3f%%' % (acc * 100))

In [None]:
# train the generator and discriminator
def train(g_model, d_model, c_model, gan_model, X, y, latent_dim, n_epochs=50, bi_class=bi_class, n_batch=32):
    loss_hist = []
    acc_hist = []
    
    train_dataset = tf.data.Dataset.from_tensor_slices((X, y)).shuffle(len(X_train)).batch(16)
    # calculate the number of batches per training epoch
    bat_per_epo = len(train_dataset)
    # calculate the number of training iterations
    n_steps = bat_per_epo * n_epochs
    # calculate the size of half a batch of samples
    half_batch = int(n_batch / 2)
    print('n_epochs=%d, n_batch=%d, 1/2=%d, b/e=%d, steps=%d' % (n_epochs, n_batch, half_batch, bat_per_epo, n_steps))
    # manually enumerate epochs
    steps = 0
    for i in range(n_epochs):
        start = time.time()
        for data in train_dataset:
            Xsup_real, ysup_real = data[0], data[1]
            # update unsupervised classifier (c)
            if bi_class == 0:
                c_loss, c_acc = c_model.train_on_batch(Xsup_real, ysup_real)
#                 c_loss, c_acc, c_auc, c_f1 = c_model.train_on_batch(Xsup_real, ysup_real)
            else:
                c_loss, c_acc = c_model.train_on_batch(Xsup_real, ysup_real)
#                 c_loss, c_acc, c_auc = c_model.train_on_batch(Xsup_real, ysup_real)
            # update unsupervised discriminator (d)
            [X_real, _], y_real = generate_real_samples([Xsup_real, ysup_real], len(ysup_real))
            d_loss1 = d_model.train_on_batch(X_real, y_real)
            X_fake, y_fake = generate_fake_samples(g_model, latent_dim, half_batch)
            d_loss2 = d_model.train_on_batch(X_fake, y_fake)
            # update generator (g)
            X_gan, y_gan = generate_latent_points(latent_dim, n_batch), np.ones((n_batch, 1))
            g_loss = gan_model.train_on_batch(X_gan, y_gan)
            steps += 1
            # summarize loss on this batch
            print('>%d, c[%.3f,%.0f], d[%.3f,%.3f], g[%.3f]' % (steps, c_loss, c_acc*100, d_loss1, d_loss2, g_loss))

        steps=0
        if bi_class == 0:
            loss, acc= c_model.evaluate(X, y, verbose=0)
            print('%d epoch Classifier Loss: %.3f Accuracy: %.3f%%' % (i+1, loss, acc * 100))
            # print (' 에포크 {} 에서 걸린 시간은 {} 초 입니다'.format(epoch +1, time.time()-start))
            print ('Runtime: {} sec'.format(time.time()-start))
            loss_hist.append(loss)
            acc_hist.append(acc)
        else:
            loss, acc = c_model.evaluate(X, y, verbose=0)
            print('%d epoch Classifier Loss: %.3f Accuracy: %.3f%%' % (i+1, loss, acc * 100))
            # print (' 에포크 {} 에서 걸린 시간은 {} 초 입니다'.format(epoch +1, time.time()-start))
            print ('Runtime: {} sec'.format(time.time()-start))
            loss_hist.append(loss)
            acc_hist.append(acc)
        
    return loss_hist, acc_hist

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((np.array(X_train).transpose([0,1,2,3]), tf.argmax(np.array(Y_train).transpose([0,1]), axis=1))).shuffle(len(X_train)).batch(16)

In [None]:
%%time
# size of the latent space
latent_dim = 31
# create the discriminator models
d_model, c_model = define_discriminator()
# create the generator
g_model = define_generator(latent_dim)
# create the gan
gan_model = define_gan(g_model, d_model)
# load image data
dataset = load_real_samples(X=np.array(X_train).transpose([0,1,2,3]), Y=np.array(Y_train).transpose([0,1]))
# train model
loss, acc = train(g_model, d_model, c_model, gan_model, np.array(X_train).transpose([0,1,2,3]), tf.argmax(np.array(Y_train).transpose([0,1]), axis=1), latent_dim)

In [None]:
predictions = c_model.predict(np.array(X_train).transpose([0,1,2,3]))

if bi_class==0:
    auc = roc_auc_score(Y_train, predictions, multi_class='raise')
    print('Multiclass Train AUC: ', auc)
else:
    auc = roc_auc_score(Y_train, predictions)
    print('Train AUC: ', auc)

In [None]:
if bi_class==0:
    f1 = f1_score(np.argmax(Y_train, axis=1), tf.argmax(predictions,axis=1), average='macro')
    print('Multiclass Train F1: ', f1)

In [None]:
# Display acc, loss

fig, loss_ax = plt.subplots()

acc_ax = loss_ax.twinx()

loss_ax.plot(loss, 'y', label='train loss')

acc_ax.plot(acc, 'b', label='train acc')

loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
# acc_ax.set_ylabel('accuray')

loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')

plt.show()

In [None]:
y_test = tf.argmax(np.array(Y_test).transpose([0,1]), axis=1)

In [None]:
_, test_acc = c_model.evaluate(np.array(X_test).transpose([0,1,2,3]),  y_test, verbose=0)
print('Test Accuracy: %.3f%%' % (test_acc * 100))

In [None]:
predictions = c_model.predict(np.array(X_test).transpose([0,1,2,3]))

if bi_class==0:
    auc = roc_auc_score(Y_test, predictions, multi_class='raise')
    print('Multiclass Train AUC: ', auc)
else:
    auc = roc_auc_score(Y_test, predictions)
    print('Train AUC: ', auc)

In [None]:
if bi_class==0:
    f1 = f1_score(np.argmax(Y_test, axis=1), tf.argmax(predictions,axis=1), average='macro')
    print('Multiclass Train F1: ', f1)