## Intel & MobileODT Cervical Cancer Screening

I am working on this project for a Kaggle competition:

https://www.kaggle.com/c/intel-mobileodt-cervical-cancer-screening

My goal is accurately identifie a woman’s cervix type (type 1, type 2 or type 3) based on the image. As a rtaning set I am using about 1500 images (about 500 images for every type of cancer).

In [4]:
import tensorflow as tf
import cv2                 # working with, mainly resizing, images
import numpy as np         # dealing with arrays
import os                  # dealing with directories
from random import shuffle # mixing up or currently ordered data that might lead our network astray in training.
from tqdm import tqdm      # a nice pretty percentage bar for tasks. Thanks to viewer Daniel BA1/4hler for this suggestion

In [2]:
TRAIN_DIR1 = 'train/Type_1'
TRAIN_DIR2 = 'train/Type_2'
TRAIN_DIR3 = 'train/Type_3'
TEST_DIR = 'test'
IMG_SIZE = 50
LR = 1e-3

In [2]:
training_data = []

In [4]:
pixel_depth = 255.0 
def create_train_data(TRAIN_DIR, label):
    for img in tqdm(os.listdir(TRAIN_DIR)):
        if not img[0].isdigit():
            continue
        path = os.path.join(TRAIN_DIR,img)
        img = (cv2.imread(path,cv2.COLOR_BGR2RGB) - pixel_depth / 2) / pixel_depth
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        training_data.append([np.array(img),np.array(label)])

In [5]:
create_train_data(TRAIN_DIR1, [1.0,0.0,0.0])
create_train_data(TRAIN_DIR2, [0.0,1.0,0.0])
create_train_data(TRAIN_DIR3, [0.0,0.0,1.0])

100%|██████████| 251/251 [03:37<00:00,  1.39it/s]
100%|██████████| 782/782 [10:59<00:00,  1.22it/s]
100%|██████████| 451/451 [05:49<00:00,  1.55it/s]


We have 251 pictures of Type 1, 782 pictures of Type 2, and 451 of Type 3. Thus, if we always will predict Type 2, we will have accuracy about 53%

In [7]:
shuffle(training_data)
np.save('train_data_50_50_color.npy', training_data)

In [4]:
train_data = np.load('train_data_50_50_color.npy')

train = train_data[:-500]
test = train_data[-250: ]

In [5]:
X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,3)
Y = np.array([i[1] for i in train])

test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,3)
test_y = np.array([i[1] for i in test])

test_x = test_x.reshape((-1, IMG_SIZE * IMG_SIZE * 3)).astype(np.float32)
X = X.reshape((-1, IMG_SIZE * IMG_SIZE * 3)).astype(np.float32)

In [6]:
print('Training set', X.shape, Y.shape)
print('Test set', test_x.shape, test_y.shape)

Training set (981, 7500) (981, 3)
Test set (250, 7500) (250, 3)


In [7]:
n_nodes_hl1 = 1500
n_nodes_hl2 = 1500
n_nodes_hl3 = 1500

n_classes = 3
batch_size = 100
hm_epochs = 200

In [8]:
x = tf.placeholder('float')
y = tf.placeholder('float')

hidden_1_layer = {'f_fum':n_nodes_hl1,
                  'weight':tf.Variable(tf.random_normal([len(X[0]), n_nodes_hl1])),
                  'bias':tf.Variable(tf.random_normal([n_nodes_hl1]))}

hidden_2_layer = {'f_fum':n_nodes_hl2,
                  'weight':tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
                  'bias':tf.Variable(tf.random_normal([n_nodes_hl2]))}

hidden_3_layer = {'f_fum':n_nodes_hl3,
                  'weight':tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
                  'bias':tf.Variable(tf.random_normal([n_nodes_hl3]))}

output_layer = {'f_fum':None,
                'weight':tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
                'bias':tf.Variable(tf.random_normal([n_classes])),}

In [9]:
def neural_network_model(data):

    l1 = tf.add(tf.matmul(data,hidden_1_layer['weight']), hidden_1_layer['bias'])
    l1 = tf.nn.relu(l1)

    l2 = tf.add(tf.matmul(l1,hidden_2_layer['weight']), hidden_2_layer['bias'])
    l2 = tf.nn.relu(l2)

    l3 = tf.add(tf.matmul(l2,hidden_3_layer['weight']), hidden_3_layer['bias'])
    l3 = tf.nn.relu(l3)

    output = tf.matmul(l3,output_layer['weight']) + output_layer['bias']

    return output

In [10]:
def train_neural_network(x):
	prediction = neural_network_model(x)
	cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits = prediction,labels = y) )
	optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)

	with tf.Session() as sess:
		sess.run(tf.initialize_all_variables())
	    
		for epoch in range(hm_epochs):
			epoch_loss = 0
			i=0
			while i < len(X):
				start = i
				end = i+batch_size
				batch_x = np.array(X[start:end])
				batch_y = np.array(Y[start:end])

				_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
				                                              y: batch_y})
				epoch_loss += c
				i+=batch_size
				
			if (epoch % 10 == 0): print('Epoch', epoch+1, 'completed out of',hm_epochs,'loss:',epoch_loss)
		correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
		accuracy = tf.reduce_mean(tf.cast(correct, 'float'))

		print('Accuracy:',accuracy.eval({x:test_x, y:test_y}))

In [11]:
train_neural_network(x)

Instructions for updating:
Use `tf.global_variables_initializer` instead.
Epoch 1 completed out of 200 loss: 2472224.32812
Epoch 11 completed out of 200 loss: 372179.392822
Epoch 21 completed out of 200 loss: 0.0
Epoch 31 completed out of 200 loss: 0.0
Epoch 41 completed out of 200 loss: 0.0
Epoch 51 completed out of 200 loss: 0.0
Epoch 61 completed out of 200 loss: 0.0
Epoch 71 completed out of 200 loss: 0.0
Epoch 81 completed out of 200 loss: 0.0
Epoch 91 completed out of 200 loss: 0.0
Epoch 101 completed out of 200 loss: 0.0
Epoch 111 completed out of 200 loss: 0.0
Epoch 121 completed out of 200 loss: 0.0
Epoch 131 completed out of 200 loss: 0.0
Epoch 141 completed out of 200 loss: 0.0
Epoch 151 completed out of 200 loss: 0.0
Epoch 161 completed out of 200 loss: 0.0
Epoch 171 completed out of 200 loss: 0.0
Epoch 181 completed out of 200 loss: 0.0
Epoch 191 completed out of 200 loss: 0.0
Accuracy: 0.46


In [13]:
def train_neural_network(x):
	prediction = neural_network_model(x)
	cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits = prediction,labels = y) )
	optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)

	with tf.Session() as sess:
		sess.run(tf.initialize_all_variables())
	    
		for epoch in range(hm_epochs):
			epoch_loss = 0
			i=0
			while i < len(X):
				start = i
				end = i+batch_size
				batch_x = np.array(X[start:end])
				batch_y = np.array(Y[start:end])

				_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
				                                              y: batch_y})
				epoch_loss += c
				i+=batch_size
				
			if (epoch % 10 == 0): 
				print('Epoch', epoch+1, 'completed out of',hm_epochs,'loss:',epoch_loss)
				correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
				accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
				print('Accuracy:',accuracy.eval({x:test_x, y:test_y}))

In [14]:
train_neural_network(x)

Instructions for updating:
Use `tf.global_variables_initializer` instead.
Epoch 1 completed out of 200 loss: 2681276.04688
Accuracy: 0.352
Epoch 11 completed out of 200 loss: 5826.27813721
Accuracy: 0.436
Epoch 21 completed out of 200 loss: 0.0
Accuracy: 0.428
Epoch 31 completed out of 200 loss: 0.0
Accuracy: 0.428
Epoch 41 completed out of 200 loss: 0.0
Accuracy: 0.428
Epoch 51 completed out of 200 loss: 0.0
Accuracy: 0.428
Epoch 61 completed out of 200 loss: 0.0
Accuracy: 0.428
Epoch 71 completed out of 200 loss: 0.0
Accuracy: 0.428
Epoch 81 completed out of 200 loss: 0.0
Accuracy: 0.428
Epoch 91 completed out of 200 loss: 0.0
Accuracy: 0.428
Epoch 101 completed out of 200 loss: 0.0
Accuracy: 0.428
Epoch 111 completed out of 200 loss: 0.0
Accuracy: 0.428
Epoch 121 completed out of 200 loss: 0.0
Accuracy: 0.428
Epoch 131 completed out of 200 loss: 0.0
Accuracy: 0.428
Epoch 141 completed out of 200 loss: 0.0
Accuracy: 0.428
Epoch 151 completed out of 200 loss: 0.0
Accuracy: 0.428
Epoch

In [4]:
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression

In [28]:
train_data = np.load('train_data_50_50_gray.npy')

train = train_data[:-500]
test = train_data[-500: ]

X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1).astype(np.float32)
Y = np.array([i[1] for i in train])

test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1).astype(np.float32)
test_y = np.array([i[1] for i in test])

In [36]:
tf.reset_default_graph()
convnet = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 128, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet, 3, activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')

model = tflearn.DNN(convnet, tensorboard_dir='log')

In [30]:
MODEL_NAME = 'types1_3-{}-{}.model'.format(LR, '2conv-basic')

In [31]:
model.fit({'input': X}, {'targets': Y}, n_epoch=3, validation_set=({'input': test_x}, {'targets': test_y}), 
    snapshot_step=500, show_metric=True, run_id=MODEL_NAME)

Training Step: 44  | total loss: [1m[32m1.00446[0m[0m | time: 3.546s
| Adam | epoch: 003 | loss: 1.00446 - acc: 0.5399 -- iter: 896/951
Training Step: 45  | total loss: [1m[32m0.99751[0m[0m | time: 4.807s
| Adam | epoch: 003 | loss: 0.99751 - acc: 0.5464 | val_loss: 1.00470 - val_acc: 0.5260 -- iter: 951/951
--


In [3]:
IMG_SIZE = 100 # I am going to try increase the image size

training_data = []
train_data = np.load('train_data_100_100_gray.npy')

train = train_data[:-500]
test = train_data[-500: ]

X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1).astype(np.float32)
Y = np.array([i[1] for i in train])

test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1).astype(np.float32)
test_y = np.array([i[1] for i in test])

NameError: name 'np' is not defined

In [6]:
tf.reset_default_graph()
convnet = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 128, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet, 3, activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')

model = tflearn.DNN(convnet, tensorboard_dir='log')

In [7]:
MODEL_NAME = 'types1_3-{}-{}.model'.format(LR, '2conv-basic_100_100')