In [None]:
import os 
import re
import pandas as pd
import numpy as np
import deepchem as dc
import tensorflow as tf
import tensorflow.keras.layers as layers

os.environ['TF_USE_LEGACY_KERAS'] = 'True'

image_dir = 'BBBC005_v1_images'
files = []
labels = []

# Parse through the BBBC005_v1_images image folder and extract file labels(file names)

for file in os.listdir(image_dir):
    if file.endswith('.TIF'):
        files.append(os.path.join(image_dir, file))             # save image path as record ids
        labels.append(int(re.findall('_C(.*?)_', file)[0]))     # 

# ImageDataset stores the images and labels (file name) in a 2-column array
# Then DeepChem stores the ImageDataset as records with columns (features, targets, weights, ids)
dataset = dc.data.ImageDataset(files, np.array(labels))         

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(tf.config.list_physical_devices('GPU'))

In [None]:
splitter = dc.splits.RandomSplitter()
train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(dataset, seed=123)


In [None]:
features = tf.keras.Input(shape = (520,696,1))
prev_layer = features
for num_outputs in [16,32,64,128,256]:
    prev_layer = layers.Conv2D(num_outputs, kernel_size=5, strides=2,
                               activation=tf.nn.relu)(prev_layer)
    prev_layer = layers.BatchNormalization()(prev_layer)
    prev_layer = layers.ReLU()(prev_layer)

prev_layer = layers.Dropout(0.2)(prev_layer)
output = layers.Dense(1)(layers.Flatten()(prev_layer))

keras_model = tf.keras.Model(inputs=features, outputs=output)
learning_rate = dc.models.optimizers.ExponentialDecay(0.001, decay_steps=2000, decay_rate=0.96)

count_model = dc.models.KerasModel(
    keras_model,
    loss = dc.models.losses.L2Loss(),   # There is no natural upper bound for cell counts, so L2 loss is appropriate
    learning_rate = learning_rate,
    model_dir='model'
)



In [None]:
# Note to self, fitting takes 75 minutes
count_model.fit(train_dataset, nb_epoch=100)


3.6264682006835938

1st attmept: RSME of 56.6

laerning rate: expDecay(0.001,0.9,250)
loss = L2Loss

2nd attmept: RSME of 54

learning_rate=0.001, decay_step=2000, decay_rate=0.96

3rd attempt: RSME of 3

added batch normalization and ReLU and Dropout layer.

In [19]:
y_pred = count_model.predict(test_dataset).flatten()
print(np.sqrt(np.mean((y_pred-test_dataset.y)**2)))

3.2151023973662447
