# Training
-----------------------------------

Here we're training on a pretty large dataset of infected and uninfected images. To download the dataset yourself, you can go [here](https://www.kaggle.com/iarunava/cell-images-for-detecting-malaria/download)

## Setting up the dataset

Follow the steps below to get your dataset set up.
 - Extract the downloaded zip
 - Rename `cell_images` to `train_cell_images`
 - Create a directory called `model`. This is where the trained model will be saved.

## Importing libraries and config

In [13]:
import os
import cv2
import keras
import numpy as np
import sklearn
import tensorflow as tf
from PIL import Image
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from keras.models import Sequential
from keras.backend.tensorflow_backend import set_session
from sklearn.model_selection import train_test_split

physical_devices = tf.config.list_physical_devices('GPU')
try:
  tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
  pass

## Loading data

In [2]:
data = []
labels = []

# Load infected images
Parasitized = os.listdir("./dataset/train_cell_images/Parasitized/")
for p in Parasitized:
    try:
        image = cv2.imread("./dataset/train_cell_images/Parasitized/" + p)
        image_from_array = Image.fromarray(image, 'RGB')
        size_image = image_from_array.resize((50, 50))
        rotated45 = size_image.rotate(45)
        rotated75 = size_image.rotate(75)
        blur = cv2.blur(np.array(size_image), (10, 10))
        data.append(np.array(size_image))
        data.append(np.array(rotated45))
        data.append(np.array(rotated75))
        data.append(np.array(blur))
        labels.append(0.0)
        labels.append(0.0)
        labels.append(0.0)
        labels.append(0.0)
    except AttributeError:
        pass

# Load uninfected images
Uninfected = os.listdir("./dataset/train_cell_images/Uninfected/")
for u in Uninfected:
    try:
        image = cv2.imread("./dataset/train_cell_images/Uninfected/" + u)
        image_from_array = Image.fromarray(image, 'RGB')
        size_image = image_from_array.resize((50, 50))
        rotated45 = size_image.rotate(45)
        rotated75 = size_image.rotate(75)
        data.append(np.array(size_image))
        data.append(np.array(rotated45))
        data.append(np.array(rotated75))
        labels.append(1.0)
        labels.append(1.0)
        labels.append(1.0)
    except AttributeError:
        pass

# Convert image pixels to numpy arrays for easy processing
cells = np.array(data)
labels = np.array(labels)

np.save("model/cells", cells)
np.save("model/labels", labels)

cells = np.load("model/cells.npy")
labels = np.load("model/labels.npy")

## Preprocessing data

In [3]:
# Shuffle cells to prevent some sort of bias
s = np.arange(cells.shape[0])
np.random.shuffle(s)
cells = cells[s]
labels = labels[s]

num_classes = len(np.unique(labels))
len_data = len(cells)

# Split into train and test datasets
x_train, x_test, y_train, y_test = train_test_split(cells, labels)

x_train = x_train.astype('float32') / 255  # Normalize RGB values by dividing with 255
x_test = x_test.astype('float32') / 255
train_len = len(x_train)
test_len = len(x_test)

print(x_train.shape)
print(y_train.shape)

(72339, 50, 50, 3)
(72339,)


## Create and save model

In [11]:
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        min_delta=1e-2,
        patience=2,
        verbose=1)
]

# Create a sequential keras model
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=3, padding="same", activation="relu", input_shape=(50, 50, 3)))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=32, kernel_size=3, padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=64, kernel_size=3, padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(500, activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(1, activation="sigmoid"))
model.summary()

# compile the model with loss function as binary_crossentropy and using adam optimizer you can test result by trying
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit (train) the model. Using a batch size which is x^2 optimizes training on my GPU
model.fit(x_train, y_train, batch_size=512, epochs=20, verbose=1, callbacks=callbacks, validation_data=(x_test, y_test))

accuracy = model.evaluate(x_test, y_test, verbose=1)
print('\n', 'Validation accuracy: ', accuracy[1])

model.save('./model/cells.h5')

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_17 (Conv2D)           (None, 50, 50, 16)        448       
_________________________________________________________________
max_pooling2d_17 (MaxPooling (None, 25, 25, 16)        0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 25, 25, 32)        4640      
_________________________________________________________________
max_pooling2d_18 (MaxPooling (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 12, 12, 64)        18496     
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 6, 6, 64)          0         
_________________________________________________________________
dropout_21 (Dropout)         (None, 6, 6, 64)         

# Prediction
------------------

## Setting up prediction dataset
 - Create a directory called `test_cell_images`
 - Copy any two images of your choosing into this directory. Name them `test1` and `test2`.

## Predicting the type of cell

In [31]:
from tensorflow.keras.models import load_model

def convert_to_array(img):
    im = cv2.imread(img)
    img_ = Image.fromarray(im, 'RGB')
    image = img_.resize((50, 50))
    image = np.array(image)
    image = image / 255
    return np.array([image])


def get_cell_name(label):
    if label == 0:
        return "Infected"
    if label == 1:
        return "Uninfected"


def predict_cell(file):
    model = load_model('./model/cells.h5')
    arr = convert_to_array(file)
    score = model.predict(arr, verbose=3)
    cell = get_cell_name(round(score[0][0]))
    return "The predicted cell is an " + cell + " cell."


print(predict_cell('./dataset/test_cell_images/test1.png')) # Infected
print(predict_cell('./dataset/test_cell_images/test2.png')) # Not infected


The predicted cell is an Infected cell.
The predicted cell is an Uninfected cell.
