In [3]:
from random import shuffle
import tensorflow as tf 
import tensorflow.keras as keras
from tensorflow.keras import callbacks
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.models import Sequential
from keras.layers import Conv2D, Flatten, MaxPool2D, Dropout, BatchNormalization, Dense
from keras.preprocessing.image import ImageDataGenerator
from sklearn import preprocessing
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau

import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
import time
import sys
from tensorflow.python.keras.engine.data_adapter import train_validation_split

from tensorflow.python.keras.engine.sequential import relax_input_shape

In [4]:
train_image_path = './images/images/train/'
test_image_path = './images/images/test/'

train_csv = './assignment5_training_data_metadata.csv'
test_csv = './assignment5_test_data_metadata.csv'
df = pd.read_csv(train_csv)
df_test = pd.read_csv(test_csv)

df.drop('id', axis=1, inplace=True)
df_test.drop('id', axis=1, inplace=True) 

df['type'].fillna('Normal', inplace=True)  # 1 - Normal, 2 - virus, 3 - bacteria, 4- stress smoking

df['type'].replace('Normal', 1, inplace=True)
df['type'].replace('Virus', 2, inplace=True)
df['type'].replace('bacteria', 3, inplace=True)
df['type'].replace('Stress-Smoking', 2, inplace=True)

unique, counts = np.unique(np.array(df['type']), return_counts=True)
print(dict(zip(unique, counts)))

{1: 1342, 2: 1409, 3: 2535}


In [5]:
batch_size = 16
img_height = 500
img_width = 500
infile = open('./image_list_train.pickle', 'rb')
image_list_train = pickle.load(infile)
infile.close()

infile = open('./image_list_test.pickle', 'rb')
image_list_test = pickle.load(infile)
infile.close()


In [6]:
sorted_train_images = sorted(image_list_train, key=lambda x: x[1])
sorted_test_images = sorted(image_list_test, key=lambda x: x[1])

for i in sorted_train_images:
    del i[1]
for i in sorted_test_images:
    del i[1]


sorted_train_labels = df.sort_values(['image_name'])
X_train = np.array(sorted_train_images)
X_train = X_train.reshape(-1, 500, 500, 1)
X_test = np.array(sorted_test_images)
X_test = X_test.reshape(-1, 500, 500, 1)

In [7]:
labels = np.array(df['type'])

import random
c = list(zip(X_train, labels))
random.shuffle(c)
X_train, labels = zip(*c)
X_train = np.array(X_train)
labels = np.array(labels)
labels = labels.reshape(-1, 1)

In [8]:
image_gen = ImageDataGenerator(
    rescale=1/255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.3
)

test_data_gen = ImageDataGenerator(rescale = 1/255)

train = image_gen.flow(
    x=X_train,
    y=labels,
    batch_size=batch_size, 
    subset='training',
    shuffle=True
)

val = image_gen.flow(
    x=X_train,
    y=labels,
    batch_size=batch_size, 
    subset='validation',
    shuffle=True
)

test = test_data_gen.flow(
      x=X_test,
      shuffle=False, 
      batch_size=batch_size
)


In [9]:
NAME = 'testing_{}'.format(time.time())
tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))

model = Sequential()

model.add(Conv2D(32, (3,3), activation='relu', input_shape=(img_width, img_height, 1)))
model.add(MaxPool2D((2,2)))

model.add(Flatten())

model.add(Dense(3, activation='softmax'))


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

# model.fit(train, epochs=5, validation_data=val, callbacks=[tensorboard])

early = EarlyStopping(monitor='val_loss', mode='min', patience=3)

learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=2, verbose=1, factor=0.3, min_lr=0.000001)

callbacks_list = [ early, learning_rate_reduction ]

model.fit(train, epochs=15, validation_data=val, callbacks=[callbacks_list, tensorboard])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 498, 498, 32)      320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 249, 249, 32)      0         
_________________________________________________________________
flatten (Flatten)            (None, 1984032)           0         
_________________________________________________________________
dense (Dense)                (None, 3)                 5952099   
Total params: 5,952,419
Trainable params: 5,952,419
Non-trainable params: 0
_________________________________________________________________
Epoch 1/15

KeyboardInterrupt: 

In [51]:
dense_layers = [1, 2]
layer_sizes = [64, 128, 256, 512]
conv_layers = [3]

# gpu_options = tf.GPUOptions(allow_growth=True)
# session = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))

for dense_layer in dense_layers:
    for layer_size in layer_sizes:
        for conv_layer in conv_layers:

            NAME = '{}-conv-{}-nodes-{}-dense-{}'.format(conv_layer, layer_size, dense_layer, int(time.time()))
            tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))

            model = keras.Sequential()

            model.add(Conv2D(layer_size, 3, activation='relu', input_shape=(250, 250, 1)))

            for l in range(conv_layer - 1):
                model.add(Conv2D(layer_size, 3, activation='relu'))
                model.add(MaxPool2D(2))
                model.add(Dropout(0.25))
            
            
            model.add(Flatten())
            for l in range(dense_layer):
                model.add(Dense(layer_size, activation='relu'))
            
            model.add(Dense(4, activation='softmax'))
            model.summary()

            model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
            model.fit(X_train, y_train, batch_size=16, epochs=10, validation_split=0.2, callbacks=[tensorboard])        


Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_21 (Conv2D)           (None, 250, 250, 64)      640       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 125, 125, 64)      0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 125, 125, 128)     73856     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 62, 62, 128)       0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 62, 62, 256)       295168    
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 31, 31, 256)       0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 31, 31, 256)     

<tensorflow.python.keras.callbacks.History at 0x7f09cc7ba820>