<a href="https://colab.research.google.com/github/Sable-20/Machine-Learning/blob/main/colorectal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [47]:
import tensorflow as tf
import numpy as np
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from google.colab import drive
import random, string
import pandas as pd

drive.mount('/content/drive')

tf.__version__
np.__version__

seed = 128
np.random.seed(seed)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [48]:
dataset, info = tfds.load('colorectal_histology', split=f'train', as_supervised=True, with_info=True, shuffle_files=True)
dataset = dataset.shuffle(buffer_size=5000, reshuffle_each_iteration=False)
dataset = dataset.map(lambda img, lbl: (tf.cast(img, tf.float32) / 255.0, lbl))

In [49]:
print(f'dataset type {type(dataset)} with {len(dataset)} images')
print(info.supervised_keys)
print(info.features.items())

dataset type <class 'tensorflow.python.data.ops.dataset_ops.MapDataset'> with 5000 images
('image', 'label')
dict_items([('image', Image(shape=(150, 150, 3), dtype=uint8)), ('label', ClassLabel(shape=(), dtype=int64, num_classes=8)), ('filename', Text(shape=(), dtype=string))])


In [50]:
batch_size = 64
validation_data = dataset.take(1000)
validation_data = validation_data.cache().batch(batch_size).prefetch(tf.data.AUTOTUNE)

test_data       = dataset.skip(1000).take(1000)
test_data       = test_data.cache().batch(batch_size).prefetch(tf.data.AUTOTUNE)

train_data   = dataset.skip(2000).take(3000)
train_data = train_data.cache().batch(batch_size).prefetch(tf.data.AUTOTUNE)

In [51]:
# model

model = tf.keras.Sequential([
    tf.keras.layers.InputLayer((150, 150, 3)),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(128, 3, activation='relu'),
    tf.keras.layers.AveragePooling2D(),
    tf.keras.layers.Conv2D(128, 3, activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(128, 3, activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(8, activation='softmax')
], name="cnn_model")

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
print(model.summary())

Model: "cnn_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_5 (Conv2D)           (None, 148, 148, 64)      1792      
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 74, 74, 64)       0         
 2D)                                                             
                                                                 
 conv2d_6 (Conv2D)           (None, 72, 72, 64)        36928     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 36, 36, 64)       0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 34, 34, 128)       73856     
                                                                 
 average_pooling2d_1 (Averag  (None, 17, 17, 128)      0 

In [52]:
dir = '/content/drive/My Drive/CNN Colorectal Histology'
model_name = 'CNN'+''.join(random.sample(string.ascii_lowercase, 12))
earlystop = tf.keras.callbacks.EarlyStopping('val_loss', patience=20, restore_best_weights=True)
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=f'{dir}/ckpts/colorectal_histology-{model_name}-' + '{epoch:02d}-{val_accuracy:0.4f'
)

In [53]:
history = model.fit(train_data, validation_data=validation_data, epochs=100,
          callbacks=[earlystop, checkpoint])

Epoch 1/100

KeyboardInterrupt: ignored

In [None]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()
print(model.evaluate(test_data))

In [None]:
drive.flush_and_unmount()
print('All changes made in this colab session should now be visible in Drive.')