In [None]:
#install kaggle
!pip install -q kaggle

In [None]:
#upload kaggle API key
from google.colab import files
files.upload()

In [None]:
#ceate kaggle directory in user's default path
!mkdir ~/.kaggle

In [None]:
#copy kaggle API key to kaggle directory
!cp kaggle.json ~/.kaggle/

In [None]:
#Permission grant for the json file
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
#import the kaggle breast cancer dataset
!kaggle datasets download -d paultimothymooney/breast-histopathology-images

In [None]:
#unzip the imported kaggle breast cancer dataset
!unzip breast-histopathology-images.zip

In [None]:
#ceate dataset directory
!mkdir /content/dataset/

In [None]:
#ceate dataset/benign directory
!mkdir /content/dataset/benign/

In [None]:
#ceate dataset/malignant directory
!mkdir /content/dataset/malignant/

In [None]:
#remove the unwanted directory
!rm -rf /content/IDC_regular_ps50_idx5

In [None]:
#import necessary Dependencies
import glob
import shutil
import os
import math
import tensorflow as tf
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt
from IPython.display import HTML

Take Balance sample of images (20000) from original dataset (avoid underrepresent of one class that may causes oversampling or undersampling)

In [None]:
#create benign image dataset without duplicates

# Get all png files from group of folders
png_files = glob.glob("/content/**/0/*.png", recursive=True)[:20000]

# Move the png files to the dataset folder and filter duplicates
for png_file in png_files:
    if not os.path.exists("/content/dataset/benign/" + os.path.basename(png_file)):
        shutil.move(png_file, "/content/dataset/benign/")

In [None]:
#create malignant image dataset without duplicates

# Get all png files in group of folders
png_files = glob.glob("/content/**/1/*.png", recursive=True)[:20000]

# Move the png files to the dataset folder
for png_file in png_files:
    if not os.path.exists("/content/dataset/malignant/" + os.path.basename(png_file)):
        shutil.move(png_file, "/content/dataset/malignant/")

In [None]:
#count benign and malignant images

# Get all PNG files in the dataset benign folder
png_files = glob.glob("/content/dataset/benign/*.png")

# Count the number of PNG files
num_png_files = len(png_files)
print(num_png_files)

# Get all PNG files in the dataset malignant folder
png_files = glob.glob("/content/dataset/malignant/*.png")

# Count the number of PNG files
num_png_files = len(png_files)
print(num_png_files)


In [None]:
#check the dataset size
!du -h /content/dataset/

In [None]:
#create Constants
BATCH_SIZE = 32
IMG_SIZE = 50
CHANNELS=3
EPOCHS=20

In [None]:
# Import image data into tensorflow dataset object
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    "dataset",
    seed=111,
    shuffle=True,
    image_size=(IMG_SIZE,IMG_SIZE),
    batch_size=BATCH_SIZE
)

In [None]:
#store class names
class_names = dataset.class_names
class_names

In [None]:
#check data set batch count
len(dataset)

**Preview batches**

In [None]:
#preview first batch data
for image_batch, labels_batch in dataset.take(1):
    print(image_batch.shape)
    print(labels_batch.numpy())

In [None]:
#preview first batch as tensor
for image_batch, labels_batch in dataset.take(1):
    print(image_batch[0])

In [None]:
#preview first batch as numpy array
for image_batch, labels_batch in dataset.take(1):
    print(image_batch[0].numpy())

In [None]:
#Visualize images
plt.figure(figsize=(15, 15))
for image_batch, labels_batch in dataset.take(1):
    for i in range(16):
        ax = plt.subplot(4, 4, i + 1)
        plt.imshow(image_batch[i].numpy().astype("uint8"))
        plt.title(class_names[labels_batch[i]])


# **Split Dataset into train , validation and test**

In [None]:
#define train dataset size
train_ds = dataset.take(int(len(dataset)*0.8))
len(train_ds)

In [None]:
#define non-train dataset size
rest_ds = dataset.skip(int(len(dataset)*0.8))
len(rest_ds)

In [None]:
#define validation dataset size
val_ds = rest_ds.take(int(len(rest_ds)*0.5))
len(val_ds)

In [None]:
#define test dataset size
test_ds = rest_ds.skip(int(len(rest_ds)*0.5)).take(int(len(rest_ds)))
len(test_ds)

In [None]:
#define train, validation, test dataset size using function
def get_dataset_partitions_tf(ds, train_split=0.8, val_split=0.1, test_split=0.1, shuffle=True, shuffle_size=10000):
    assert (train_split + test_split + val_split) == 1

    ds_size = len(ds)

    if shuffle:
        ds = ds.shuffle(shuffle_size, seed=200)

    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)

    train_ds = ds.take(train_size)
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)

    return train_ds, val_ds, test_ds

In [None]:
train_ds, val_ds, test_ds = get_dataset_partitions_tf(dataset)

In [None]:
len(train_ds)

In [None]:
len(val_ds)

In [None]:
len(test_ds)

Use techniques for Cache, Shuffle, and Prefetch the Dataset

In [None]:
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.cache().shuffle(1000).prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.cache().shuffle(1000).prefetch(buffer_size=tf.data.AUTOTUNE)

In [None]:
#Creating a Layer for Resizing and Normalization
resize_and_rescale = tf.keras.Sequential([
  layers.experimental.preprocessing.Resizing(IMG_SIZE, IMG_SIZE),
  layers.experimental.preprocessing.Rescaling(1./255),
])

In [None]:
#Data Augmentation
data_augmentation = tf.keras.Sequential([
  layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
  layers.experimental.preprocessing.RandomRotation(0.2),
])

In [None]:
#Applying Data Augmentation to Train Dataset
train_ds = train_ds.map(
    lambda x, y: (data_augmentation(x, training=True), y)
).prefetch(buffer_size=tf.data.AUTOTUNE)


##**Model Architecture**

In [None]:
input_shape = (BATCH_SIZE, IMG_SIZE, IMG_SIZE, CHANNELS)
n_classes = 2

model = models.Sequential([
    resize_and_rescale,
    layers.Conv2D(32, kernel_size = (3,3), activation='relu', input_shape=input_shape),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64,  kernel_size = (3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(n_classes, activation='sigmoid'),
])

model.build(input_shape=input_shape)