## Import the required packages

In [26]:
# Genric Libraries
import cv2
import re
import os
import numpy as np
import shutil
import matplotlib.pyplot as plt
%matplotlib inline

# Keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_dim_ordering('th')
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

### Crop the images to make them ready for modeling

In [27]:
# Choose a fixed width and height of the image as that is required for training
width = 64
height = 64

In [30]:
# Cropping the images

#Give the three directories for stage1, stage3 and normal cells. Choose any one sample
dirc1="Data/cancer cell data/b_all_1_stage3/sample1/"
dirc2="Data/cancer cell data/rs411_stage1/sample2/"
dirc3="Data/normal/b_cells_control/qpi/sample6/"
dirs1=[dirc1+s for s in os.listdir(dirc1)]
dirs2=[dirc2+s for s in os.listdir(dirc2)]
dirs3=[dirc3+s for s in os.listdir(dirc3)]
dirs=(dirs1, dirs2, dirs3)

stage3=[]
stage1=[]
normal=[]
for dirs_list in sorted(dirs):
    
    for directory in dirs_list:
        #reading the image 
        idx=0
        image = cv2.imread(directory)
        image = image[85:540, 100:710]
        edged = cv2.Canny(image, 10, 250)

        #applying closing function 
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
        closed = cv2.morphologyEx(edged, cv2.MORPH_CLOSE, kernel)

        #finding_contours 
        (cnts, _) = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        for c in cnts:
            x,y,w,h = cv2.boundingRect(c)
            if w>50 and h>50:
                idx+=1
                new_img=image[y:y+h,x:x+w]
                if new_img.shape[0] != image.shape[0] and new_img.shape[1] != image.shape[1]:
                    new_img=cv2.resize(new_img, (width,height))
                    if directory.find('stage3')>0:
                        stage3.append(new_img)
                    elif directory.find('stage1')>0:
                        stage1.append(new_img)
                    else:
                        normal.append(new_img)
                        
stage3=np.array(stage3)
stage1=np.array(stage1)
normal=np.array(normal)

In [31]:
print(stage3.shape)
print(stage1.shape)
print(normal.shape)

(93, 64, 64, 3)
(95, 64, 64, 3)
(42, 64, 64, 3)


### Preparing data for modeling

In [5]:
# Create X and y datasets
X = np.concatenate([stage3, stage1, normal], axis=0)
y = np.concatenate([np.full(stage3.shape[0], 3),np.full(stage1.shape[0], 1),np.full(normal.shape[0], 0)])

In [6]:
# Train test split
indices = np.random.permutation(X.shape[0])
trainObsCount=int(0.6*X.shape[0])
training_idx, test_idx = indices[:trainObsCount], indices[trainObsCount:]
X_train, X_test = X[training_idx,:], X[test_idx,:]
y_train, y_test = y[training_idx], y[test_idx]

In [7]:
# Just check for shapes and counts
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

print(np.unique(y_train, return_counts=True))
print(np.unique(y_test, return_counts=True))

(158, 64, 64, 3)
(106, 64, 64, 3)
(158,)
(106,)
(array([0, 1, 3]), array([36, 61, 61]))
(array([0, 1, 3]), array([26, 41, 39]))


In [8]:
# Reshape to be [samples][pixels][width][height]
X_train = X_train.reshape(X_train.shape[0], 3, width, height).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 3, width, height).astype('float32')
# Normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255
# One hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

### Do modeling and test for accuracies

In [9]:
def cancer_model():
    # create model
    model = Sequential()
    model.add(Conv2D(50, (8, 8), input_shape=(3, 64, 64), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(30, (5, 5), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(15, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return(model)

In [10]:
model = cancer_model()
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30, batch_size=20)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("CNN Misclassification Error for Test dataset: %.2f%%" % (100-scores[1]*100))

Train on 158 samples, validate on 106 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
CNN Misclassification Error for Test dataset: 22.64%


In [11]:
model.metrics_names

['loss', 'acc']

In [12]:
scores # the names of the values are as displayed by the output of the above cell

[0.5525172816694908, 0.7735849067849933]