Copyright 2018 The TensorFlow Datasets Authors, Licensed under the Apache License, Version 2.0

In [1]:
!pip install -q tensorflow-datasets tensorflow

In [2]:
pip install keras_cv

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [11]:
import tensorflow as tf
import tensorflow_datasets as tfds
import keras
import keras_cv
import numpy as np
import random
#'train' is the only available split
ds = tfds.load('colorectal_histology', split='train', shuffle_files=True)


In [12]:
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Resizing, Rescaling
from keras_cv.layers import Grayscale

In [5]:
X = np.array([row['image'] for row in ds])


In [6]:
y = np.array([row['label'] for row in ds])
y = keras.utils.to_categorical(y)
z = y[0][:]

In [7]:
y = y[:,0]

# Data augmentation for tumors

In [8]:
tumor_images = X[np.where(y == 1)]
tumor_images.shape

(625, 150, 150, 3)

In [9]:
new_tumors = []
while len(new_tumors)<5000:
  random_index = random.randint(0,len(tumor_images)-1)
  random_tumor = tumor_images[random_index]
  rotated_tumor = tf.keras.preprocessing.image.random_rotation(x = random_tumor,
                                                              row_axis=0,
                                                              col_axis=1,
                                                              channel_axis=2,
                                                              rg = 360)
  new_tumors.append(rotated_tumor) 
  y = np.append(y, 1)

ary_tumors = np.array(new_tumors)

In [10]:
ary_tumors = np.array(new_tumors)
X = np.concatenate((X,ary_tumors))


Visual proof of the working rotation

In [18]:
from PIL import Image
import numpy as np

w, h = 512, 512
data = np.zeros((h, w, 3), dtype=np.uint8)
data[0:256, 0:256] = [255, 0, 0] # red patch in upper left
img = Image.fromarray(
    tf.keras.preprocessing.image.random_rotation(x = X[-1],
                                                  row_axis=0,
                                                  col_axis=1,
                                                  channel_axis=2,
                                                  rg = 360),
                       'RGB')
img.save('my.png')
img.show()

In [19]:
X.shape

(15000, 150, 150, 3)

# Grey scaling



In [None]:
def to_grey_scale(X):
  return np.dot(X[...,:3], [.3, .6, .1])

to_grey_scale(X)

In [10]:
grayed_images = []
for image in X:
  np.dot(image)

In [17]:
to_grayscale = keras_cv.layers.preprocessing.Grayscale()
augmented_images = to_grayscale(X)

# Manual shuffle data

In [49]:
data = [(augmented_images[i],y[i]) for i in range(len(y))]

In [50]:
random.shuffle(data)

In [51]:
augmented_images = np.array([data[i][0] for i in range(len(data))])
y = np.array([data[i][1] for i in range(len(data))])


In [52]:
(y[1],y[2],y[3])


(1.0, 1.0, 1.0)

# Manual train test split

In [53]:
assert len(augmented_images) == len(y)
train_test_ratio = .8
split_index = int(len(augmented_images)*train_test_ratio)

X_train = augmented_images[:split_index]
y_train = y[:split_index]
X_test = augmented_images[split_index:]
y_test = y[split_index:]


# Model

In [61]:
model = keras.models.Sequential([
    tf.keras.layers.Rescaling(scale = 1/255, input_shape=(150,150,1)),
    #keras_cv.layers.preprocessing.Grayscale(),
    Conv2D(16,3,padding="same",activation='relu'),
    MaxPooling2D(),
    Conv2D(32,3,padding="same",activation='relu'),
    MaxPooling2D(),
    Conv2D(64,3,padding="same",activation='relu'),
    MaxPooling2D(),
    Conv2D(64,3,padding="same",activation='relu'),
    MaxPooling2D(),
    Conv2D(64,3,padding="same",activation='relu'),
    MaxPooling2D(),
    Conv2D(64,3,padding="same",activation='relu'),
    MaxPooling2D(),
    Conv2D(64,3,padding="same",activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1,activation='softmax'),
])


In [62]:
model.compile(tf.keras.optimizers.SGD(learning_rate=0.01),
              loss = tf.keras.losses.BinaryCrossentropy(),
              metrics=[tf.keras.metrics.Recall(), "accuracy"]
)

In [63]:
(X.shape,y.shape)

((10000, 150, 150, 3), (10000,))

In [64]:
#model.fit(augmented_images,y,batch_size=32, epochs=3, validation_split=.2)
model.fit(X_train, y_train,
          validation_data=(X_test,y_test),
          epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fcccef77dc0>

In [59]:
sum(np.where(model.predict(augmented_images) > .5))



array([   0,    1,    2, ..., 9997, 9998, 9999])