# DCGAN for Handling Class Imbalance

### Import Libraries 

In [1]:
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D, Conv2DTranspose
from keras.models import Sequential, Model
from keras.optimizers import Adam,SGD,RMSprop
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import sys
import numpy as np
import tensorflow as tf

### Initialization  

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')

In [None]:
device_name

'/device:GPU:0'

In [None]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 11924215602650111082, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 14674281152
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 8586709905423516146
 physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"]

In [13]:
# Input shape
img_rows = 128
img_cols = 128
channels = 1
img_shape = (img_rows, img_cols, channels)
latent_dim = 100
#disc_optimizer = SGD(0.0001, 0.9)
#gen_optimizer = Adam(0.0001,0.8)
#optimizer = SGD(0.00007, 0.9)
#optimizer = SGD(0.0002, 0.5)
#optimizer = SGD(0.00009, 0.9)
#optimizer = SGD(0.00001, 0.9)
#gen_optimizer = SGD(0.00005,0.9)
lr = 1e-4
decay = 6e-8
optimizer = RMSprop(lr=lr, decay=decay)

### Generator and Discriminator 

In [14]:
def GAN_generator(latent_dim1):

    model = Sequential()
    model.add(Dense(1024 * 4 * 4, activation="relu", input_dim=latent_dim1))
    model.add(Reshape((4, 4, 1024)))
    model.add(UpSampling2D())
    model.add(Conv2DTranspose(512, kernel_size=3, strides=(1, 1), dilation_rate=2, padding="same"))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Activation("relu"))
    model.add(UpSampling2D())
    model.add(Conv2DTranspose(256, kernel_size=3, strides=(1, 1), dilation_rate=2, padding="same"))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Activation("relu"))
    model.add(UpSampling2D())
    model.add(Conv2DTranspose(128, kernel_size=3, strides=(1, 1), dilation_rate=2, padding="same"))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Activation("relu"))
    model.add(UpSampling2D())
    model.add(Conv2DTranspose(64, kernel_size=3, strides=(1, 1), dilation_rate=2, padding="same"))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Activation("relu"))
    model.add(UpSampling2D())
    model.add(Conv2DTranspose(128, kernel_size=3, strides=(1, 1), dilation_rate=2, padding="same"))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Activation("relu"))
    model.add(Conv2DTranspose(1, kernel_size=3, strides=(1, 1), dilation_rate=2, padding="same"))
    model.add(Activation("tanh"))

    model.summary()
    noise = Input(shape=(latent_dim,))
    img = model(noise)
    return Model(noise, img)

In [15]:
def GAN_discriminator(img_shape1):

    model = Sequential()
    model.add(Conv2D(32, kernel_size=3, strides=(2, 2), input_shape=img_shape1, padding="same"))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.25))
    model.add(Conv2D(64, kernel_size=3, strides=(2, 2), padding="same"))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.25))
    model.add(Conv2D(128, kernel_size=3, strides=(2, 2), padding="same"))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.25))
    model.add(Conv2D(256, kernel_size=3, strides=(2, 2), padding="same"))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.25))
    model.add(Conv2D(256, kernel_size=3, strides=(2, 2), padding="same"))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))

    model.summary()
    img = Input(shape=img_shape1)
    class_RF = model(img)
    return Model(img, class_RF)

### Loading the DCGAN models 

In [16]:
discriminator = GAN_discriminator(img_shape) 
discriminator.compile(loss='binary_crossentropy',
                      optimizer=optimizer, metrics=['accuracy'])
generator = GAN_generator(latent_dim) 
z = Input(shape=(latent_dim,))
img = generator(z)

discriminator.trainable = False
valid = discriminator(img)

dcgan = Model(z, valid)
dcgan.compile(loss='binary_crossentropy', optimizer=optimizer)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 64, 64, 32)        320       
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU)    (None, 64, 64, 32)        0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 64, 64, 32)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 32, 32, 64)        18496     
_________________________________________________________________
batch_normalization_9 (Batch (None, 32, 32, 64)        256       
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 32, 32, 64)        0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 32, 32, 64)       

In [None]:
class PlotLosses(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        
        self.fig = plt.figure()
        
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.i += 1
        
        clear_output(wait=True)
        plt.plot(self.x, self.losses, label="loss")
        plt.plot(self.x, self.val_losses, label="val_loss")
        plt.legend()
        plt.show();
        
plot_losses = PlotLosses()

### Load NIH chest x-ray images 

In [17]:
def nih_xrays(epochs, batch_size):
    (img_x, img_y) = 128, 128
    train_path = "/content/drive/My Drive/sample_labels.csv"

    class_name = 'Cardiomegaly' #['Atelectasis', 'No Finding', 'Cardiomegaly', 'Effusion', 'Pneumothorax']
    num_classes = 14

    # Load training data
    dataTrain = pd.read_csv(train_path)
    
    x_train = []
    # prepare label binarizer
    from sklearn import preprocessing
    image_path = "/content/drive/My Drive/images/"

    count = 0
    for index, row in dataTrain[dataTrain["Finding Labels"] == class_name].iterrows():
        #print('index,row',row["Image Index"])
        img1 = image_path + row["Image Index"]
        image1 = cv2.imread(img1)  # Image.open(img).convert('L')
        #print('image 1',image1.shape)
        image1 = image1[:, :, 0]
        #print('image 1',image1.shape)
        arr1 = cv2.resize(image1, (img_x, img_y))
        arr1 = arr1.astype('float32')
        arr1 /= 255.0
        arr1 = arr1 - np.mean(arr1)
        x_train.append(arr1)
        count += 1

    print("shape of x train: {}".format(len(x_train)))
    x_train = np.asarray(x_train)

    x_train = x_train.reshape(count, img_y, img_x, 1)

    valid1 = np.ones((batch_size, 1))
    fake = np.zeros((batch_size, 1))

    for epoch in range(epochs):
        idx = np.random.randint(0, x_train.shape[0], batch_size)
        imgs = x_train[idx]
        #print('img shape',imgs.shape)

        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        gen_imgs = generator.predict(noise)

        # Train the discriminator 
        d_loss_real = discriminator.train_on_batch(imgs, valid1)
        d_loss_fake = discriminator.train_on_batch(gen_imgs, fake)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
        # Train the generator 
        g_loss = dcgan.train_on_batch(noise, valid1)

        # Plot the progress
        print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss))

### Train Generator and Discriminator 

In [18]:
with tf.device('/gpu:0'):
  if __name__ == '__main__':
    nih_xrays(epochs=100, batch_size=16)
    generator.save('GAN_gen.h5')
    discriminator.save('GAN_dis.h5')

    from sklearn import preprocessing

    lb = preprocessing.LabelEncoder()  # Binarizer()

    #9classes = ['Atelectasis', 'No Finding', 'Cardiomegaly', 'Effusion', 'Pneumothorax']
    #classes =['Cardiomegaly','Emphysema','Effusion','Hernia','Nodule','Pneumothorax','Atelectasis','Pleural_Thickening','Mass','Edema','Consolidation','Infiltration','Fibrosis','Pneumonia']
    classes =['Cardiomegaly']
    OHE_labels = lb.fit_transform(classes)
    #print('ohe labels',OHE_labels)
    # at the end, loop per class, per 1000 images
    cnt = 0
    fig, ax = plt.subplots()
    for label in OHE_labels:
        for num in range(100):
            noise1 = np.random.normal(0, 1, (16, latent_dim))  
            #print('noise 1',noise1)
            img = generator.predict(noise1)
            plt.imshow(img[cnt, :, :, 0], cmap='gray')
            fig.savefig("/content/drive/My Drive/GAN/Cardiomegaly/ClassCardio" + str(label) + "-" + str(num) + ".png")
            plt.clf()


shape of x train: 50
0 [D loss: 1.475846, acc.: 12.50%] [G loss: 0.683983]
1 [D loss: 0.834442, acc.: 53.12%] [G loss: 0.673194]
2 [D loss: 0.394348, acc.: 84.38%] [G loss: 0.673075]
3 [D loss: 0.364382, acc.: 87.50%] [G loss: 0.669769]
4 [D loss: 0.267758, acc.: 96.88%] [G loss: 0.659262]
5 [D loss: 0.261176, acc.: 93.75%] [G loss: 0.660052]
6 [D loss: 0.158004, acc.: 93.75%] [G loss: 0.652004]
7 [D loss: 0.123091, acc.: 100.00%] [G loss: 0.659834]
8 [D loss: 0.129898, acc.: 100.00%] [G loss: 0.648985]
9 [D loss: 0.135482, acc.: 96.88%] [G loss: 0.639365]
10 [D loss: 0.135018, acc.: 100.00%] [G loss: 0.649477]
11 [D loss: 0.373975, acc.: 90.62%] [G loss: 0.659248]
12 [D loss: 0.498725, acc.: 84.38%] [G loss: 0.674089]
13 [D loss: 0.298307, acc.: 87.50%] [G loss: 0.682181]
14 [D loss: 0.585962, acc.: 75.00%] [G loss: 0.689590]
15 [D loss: 0.710827, acc.: 46.88%] [G loss: 0.674973]
16 [D loss: 0.713019, acc.: 53.12%] [G loss: 0.691309]
17 [D loss: 0.864733, acc.: 59.38%] [G loss: 0.6738

<Figure size 432x288 with 0 Axes>