# Bacterial Classification

## Goggle Drive

In [None]:
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    COLAB = True
    print("Note: using Google CoLab")
    %tensorflow_version 2.x
except:
    print("Note: not using Google CoLab")
    COLAB = False
    
%cd drive/My Drive/projects/GDL_code

## imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from utils.loaders import load_bacteria

import tensorflow.keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input, BatchNormalization, Dropout
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.initializers import RandomNormal

## configuration

In [None]:
# Generation resolution - Must be square 
# Training data is also scaled to this.
GENERATE_SQUARE = 250
IMAGE_CHANNELS = 3

# Configuration
DATA_PATH = '/content/drive/My Drive/projects/GDL_code/data/bacteria/'
EPOCHS = 50
BATCH_SIZE = 32

print(f"Will generate {GENERATE_SQUARE}px square images.")

# run params
SECTION = 'cnn'
RUN_ID = '0001'
DATA_NAME = 'bacteria'
RUN_FOLDER = 'run/{}/'.format(SECTION)
RUN_FOLDER += '_'.join([RUN_ID, DATA_NAME])

if not os.path.exists(RUN_FOLDER):
    os.mkdir(RUN_FOLDER)
    os.mkdir(os.path.join(RUN_FOLDER, 'viz'))
    os.mkdir(os.path.join(RUN_FOLDER, 'images'))
    os.mkdir(os.path.join(RUN_FOLDER, 'weights'))

mode =  'build' #'load' #

## load & preprocess images

In [None]:
# Depending on size of image dataset, initial preprocessing can take a while.
# Because of this time needed, save a Numpy preprocessed file.
# In case this file is large enough to cause problems for some verisons of Pickle,
# we use Numpy binary files instead.
training_data = load_bacteria(DATA_PATH, GENERATE_SQUARE, GENERATE_SQUARE, IMAGE_CHANNELS, train=True)

# test image loading
plt.imshow(training_data[5][0])

In [None]:
trainImages = np.array([i[0] for i in training_data]).reshape(
    -1, GENERATE_SQUARE, GENERATE_SQUARE, IMAGE_CHANNELS)
trainLabels = np.array([i[1] for i in training_data])

## architecture

In [None]:
weight_init = RandomNormal(mean=0., stddev=0.02)
batch_norm_momentum = 0.9
dropout_rate = 0.25


input_layer = Input(shape=(GENERATE_SQUARE,GENERATE_SQUARE,IMAGE_CHANNELS)), name='model_input')

x = input_layer

x = Conv2D(
    filters=32,
    kernel_size=(3, 3),
    activation='relu',
    strides=2,
    padding='same',
    kernel_initializer=weight_init)(x)

x = MaxPooling2D(pool_size=(2, 2))(x)

x = BatchNormalization(momentum=batch_norm_momentum)(x)

x = Conv2D(
    filters=64,
    kernel_size=(3, 3),
    activation='relu',
    strides=2,
    padding='same',
    kernel_initializer=weight_init)(x)

x = MaxPooling2D(pool_size=(2, 2))(x)

x = BatchNormalization(momentum=batch_norm_momentum)(x)

x = Conv2D(
    filters=96,
    kernel_size=(3, 3),
    activation='relu',
    strides=2,
    padding='same',
    kernel_initializer=weight_init)(x)

x = MaxPooling2D(pool_size=(2, 2))(x)

x = BatchNormalization(momentum=batch_norm_momentum)(x)

x = Conv2D(
    filters=64,
    kernel_size=(3, 3),
    activation='relu',
    strides=2,
    padding='same',
    kernel_initializer=weight_init)(x)

x = MaxPooling2D(pool_size=(2, 2))(x)

x = BatchNormalization(momentum=batch_norm_momentum)(x)

x = Dropout(rate=dropout_rate)(x)

x = Flatten()(x)

x = Dense(256, activation='relu', kernel_initializer=weight_init)(x)

x = Dropout(rate=dropout_rate)(x)

x = Dense(128, activation='relu', kernel_initializer=weight_init)(x)

output_layer = Dense(2, activation='softmax', kernel_initializer=weight_init)(x)

model = Model(input_layer, output_layer)

## training

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.fit(trainImages, trainLabels, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1)

## testing

In [None]:
# Depending on size of image dataset, initial preprocessing can take a while.
# Because of this time needed, save a Numpy preprocessed file.
# In case this file is large enough to cause problems for some verisons of Pickle,
# we use Numpy binary files instead.
test_data = load_bacteria(DATA_PATH, GENERATE_SQUARE, GENERATE_SQUARE, IMAGE_CHANNELS, train=False)
plt.imshow(test_data[3][0])

In [None]:
testImages = np.array([i[0] for i in test_data]).reshape(
    -1, GENERATE_SQUARE, GENERATE_SQUARE, IMAGE_CHANNELS)
testLabels = np.array([i[1] for i in test_data])

loss, acc = model.evaluate(testImages, testLabels, verbose=0)
print(acc * 100)