In [1]:
from random import shuffle
import tensorflow as tf 
import tensorflow.keras as keras
from tensorflow.keras import callbacks
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.models import Sequential
from keras.layers import Conv2D, Flatten, MaxPool2D, Dropout, BatchNormalization, Dense
from keras.preprocessing.image import ImageDataGenerator
from sklearn import preprocessing
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau

import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
import time
import sys
from tensorflow.python.keras.engine.data_adapter import train_validation_split

from tensorflow.python.keras.engine.sequential import relax_input_shape

In [2]:
train_image_path = './images/images/train/'
test_image_path = './images/images/test/'

train_csv = './assignment5_training_data_metadata.csv'
test_csv = './assignment5_test_data_metadata.csv'
df = pd.read_csv(train_csv)
df_test = pd.read_csv(test_csv)

df['type'].fillna('Normal', inplace=True)  # 1 - Normal, 2 - virus, 3 - bacteria, 4- stress smoking
df['type'].replace('Normal', 1, inplace=True)
df['type'].replace('Virus', 2, inplace=True)
df['type'].replace('bacteria', 3, inplace=True)
df['type'].replace('Stress-Smoking', 2, inplace=True) # I should just remove this from the dataset. 

unique, counts = np.unique(np.array(df['type']), return_counts=True)
print(dict(zip(unique, counts)))

{1: 1342, 2: 1409, 3: 2535}


In [3]:
img_height, img_width = 256, 256

training_data = []
testing_data = []

def create_data():
    for filename, filetype in zip(df['image_name'], df['type']): 
        img = cv2.imread((train_image_path + filename), cv2.IMREAD_GRAYSCALE)
        new = cv2.resize(img, (img_height, img_width))
        training_data.append([new, filetype])
        
    for filename, fileidx in zip(df_test['image_name'], df_test['id']): 
        img = cv2.imread((test_image_path + filename), cv2.IMREAD_GRAYSCALE)
        new = cv2.resize(img, (img_height, img_width))
        testing_data.append([new, fileidx])

In [4]:
# create_data()

# import random
# random.shuffle(training_data)
# # random.shuffle(testing_data)

# X = []
# y = []
# X_test = []
# id_test = []

# for features, label in training_data:
#     X.append(features)
#     y.append(label)
    
# for features, idx in testing_data:
#     X_test.append(features)
#     id_test.append(idx)

# X = np.array(X).reshape(-1, img_width, img_height, 1)
# X_test = np.array(X_test).reshape(-1, img_width, img_height, 1)
# y = np.array(y)
# id_test = np.array(id_test)

# with open('./pickles/X.pickle', 'wb') as handle:
#     pickle.dump(X, handle)
# with open('./pickles/y.pickle', 'wb') as handle:
#     pickle.dump(y, handle)
# with open('./pickles/X_test.pickle', 'wb') as handle:
#     pickle.dump(X_test, handle)
# with open('./pickles/id_test.pickle', 'wb') as handle:
#     pickle.dump(id_test, handle)

In [5]:
with open('./pickles/X.pickle', 'rb') as handle:
    X = pickle.load(handle)
with open('./pickles/y.pickle', 'rb') as handle:
    y = pickle.load(handle)
with open('./pickles/X_test.pickle', 'rb') as handle:
    X_test = pickle.load(handle)
with open('./pickles/id_test.pickle', 'rb') as handle:
    id_test = pickle.load(handle)   
    

In [6]:
X = X/255.0
X_test = X_test/255.0

In [7]:
ohe = preprocessing.OneHotEncoder()
categories = np.array(y.reshape(-1,1))
y_ohe = ohe.fit_transform(categories).todense()
y_1 = np.array(y_ohe)

In [8]:
from sklearn.utils.class_weight import compute_class_weight
y_integers = np.argmax(y_1, axis=1)
class_weights = compute_class_weight('balanced', np.unique(y_integers), y_integers)
d_class_weights = dict(enumerate(class_weights))



In [9]:
d_class_weights

{0: 1.3129657228017884, 1: 1.2505322924059616, 2: 0.695069033530572}

In [10]:
# NAME = 'testing_{}'.format(time.time())
# tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))

# model = Sequential()

# model.add(Conv2D(32, (3,3), activation='relu', input_shape=X.shape[1:]))

# model.add(Conv2D(32, (3,3), activation='relu'))
# model.add(MaxPool2D(2))

# model.add(Conv2D(64, (3,3), activation='relu'))
# model.add(MaxPool2D(2))

# model.add(Conv2D(128, (3,3), activation='relu'))
# model.add(MaxPool2D(2))

# model.add(Conv2D(256, (3,3), activation='relu'))
# model.add(MaxPool2D(2))

# model.add(Conv2D(512, (3,3), activation='relu'))
# model.add(MaxPool2D(2))


# model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# model.add(Dense(3, activation='softmax'))

# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# model.summary()

# early = EarlyStopping(monitor='val_loss', mode='min', patience=3)
# learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=2, verbose=1, factor=0.3, min_lr=0.000001)
# callbacks_list = [ early, learning_rate_reduction, tensorboard ]

# history = model.fit(X, y_ohe, epochs=25, validation_split=0.2, batch_size=32, class_weight=d_class_weights, callbacks=[callbacks_list])



In [None]:
dense_layers = [0, 1, 2, 3]
layer_sizes = [64, 128, 256, 512, 1024]
conv_layers = [4, 5, 6]

flag = True
idx = 0
for dense_layer in dense_layers:
    for layer_size in layer_sizes:
        for conv_layer in conv_layers:
            if flag:
                if  idx == 4:
                    flag = False
                idx = idx + 1
                continue
            try:
                NAME = '{}-conv-{}-nodes-{}-dense-{}'.format(conv_layer, layer_size, dense_layer, int(time.time()))
                tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))

                model = keras.Sequential()

                model.add(Conv2D(layer_size, 3, activation='relu', input_shape=X.shape[1:]))

                for l in range(conv_layer - 1):
                    model.add(Conv2D(layer_size, 3, activation='relu'))
                    model.add(MaxPool2D(2))
#                     model.add(Dropout(0.25))


                model.add(Flatten())
                for l in range(dense_layer):
                    model.add(Dense(layer_size, activation='relu'))
                model.add(Dense(3, activation='softmax'))
                
                model.summary()
                
                model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
                
                early = EarlyStopping(monitor='val_loss', mode='min', patience=3)
                learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=2, verbose=1, factor=0.3, min_lr=0.000001)
                callbacks_list = [ early, learning_rate_reduction, tensorboard ]
                
                model.fit(X, y_ohe, epochs=25, validation_split=0.2, batch_size=32, class_weight=d_class_weights, callbacks=[callbacks_list])
                
                model.save('./models/{}'.format(NAME))
            except Exception as e: #trying to stop OOM if I get them and just continue on
                print(str(e))
                continue

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 254, 254, 64)      640       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 252, 252, 64)      36928     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 126, 126, 64)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 124, 124, 64)      36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 62, 62, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 60, 60, 64)        36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 30, 30, 64)        0

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25

Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354.
Epoch 6/25
INFO:tensorflow:Assets written to: ./models/3-conv-128-nodes-0-dense-1618085068/assets
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 254, 254, 128)     1280      
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 252, 252, 128)     147584    
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 126, 126, 128)     0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 124, 124, 128)     147584    
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 62, 62, 128)       0         
__________________

Epoch 1/25
  1/133 [..............................] - ETA: 11:41 - loss: 1.0341 - accuracy: 0.5000

In [None]:
model.save('./models/softamx_{:.4}'.format(history.history['val_accuracy'][-1]))

In [None]:
# model = keras.models.load_model('./models/sigmoid_testing_1618075828.1710396')

In [None]:
pred = model.predict_classes(X_test)
# pred_ = np.argmax(model.predict(X_test), axis=-1) # this is the non-deprecated version

In [None]:
pred = pred+1

In [None]:
output = pd.DataFrame({'id': id_test, 'type': pred})
output.to_csv('prediction.csv', index=False)