In [None]:
import tensorflow as tf
import os
import random
from io import BytesIO
from PIL import Image
import numpy as np
from google.cloud import storage
import pandas as pd


class StorageImageDataGenerator(tf.keras.utils.Sequence):
    """Custom data generator to fetch batches from Cloud Storage.

    This generator is really inefficient, calls to Cloud Storage should be asynchronous
    and could benefit from multithreading. For example purpose only.

    For a given project, you can have different versions of a same dataset,
    e.g. full res images and downscaled images, raw images and processed images, etc.
    It is generally a good idea to treat these versions as different datasets.
    """
    def __init__(self, bucket_name, dataset="half_res", fold="train", batch_size=32):
        self.bucket_name = bucket_name
        self.dataset = dataset
        self.fold = fold # train, validation, test
        self.batch_size = batch_size

        # Anonymous client is required to query data from public buckets
        self.client = storage.Client.create_anonymous_client()

        self.df = self.make_df() # Simple dataframe to map input images and output masks
        self.indices = self.df.index.to_list() # Dataframe indices

        # Callback function
        self.on_epoch_end()

    def read_img(self, img_path):
        """Utility function to load images from Cloud Storage."""
        blob = self.client.bucket(self.bucket_name).blob(img_path)
        return Image.open(BytesIO(blob.download_as_bytes()))

    def make_df(self):
        """Map each image filepath with the corresponding mask filepath."""

        # It assumes images and masks have the same name.
        images = list(
            self.client.list_blobs(
              bucket_or_name=self.bucket_name,
              prefix=f"{self.dataset}/{self.fold}/images"
            )
        )
        return pd.DataFrame({
            "X": [img.name for img in images],
            "y": [img.name.replace("images", "masks") for img in images]
        })

    def __len__(self):
        """Number of batches generated per epoch."""
        return len(self.df) // self.batch_size

    def __getitem__(self, index):
        """Return the i-th batch as a tuple (batch_X, batch_y).

        This is where you define the batch generation logic. This can be as custom
        as you want. You can scale and transform your data here if needed, load your
        data from anywhere, into any shape.
        """
        batch = self.indices[index * self.batch_size:self.batch_size * (1 + index)]
        X = self.df.iloc[batch]["X"]
        y = self.df.iloc[batch]["y"]
        return self.__get_X(X), self.__get_y(y)

    def __get_X(self, X):
        """Read the batch of image filepaths into a numpy array. (batch_size, X.width, X.height, nb_channels)"""
        img_list = []
        for img_path in X:
            img = self.read_img(img_path)
            img = np.asarray(img) / 255.0
            img_list.append(img)
        return np.array(img_list)

    def __get_y(self, y):
      """Read the batch of mask filepaths into a numpy array. (batch_size, X.width, X.height, nb_channels)"""
        img_list = []
        for img_path in y:
            img = self.read_img(img_path)
            img = np.asarray(img) / 255.0
            img_list.append(img)
        return np.array(img_list)

    def on_epoch_end(self):
      """Generally used to shuffle indices at the end of a training epoch."""
      pass

train_gen = StorageImageDataGenerator(bucket_name="breast-ultrasound-images", fold="train")
test_gen = StorageImageDataGenerator(bucket_name="breast-ultrasound-images", fold="test")


# Model

In [None]:
from tensorflow.keras import Model
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam

def unet_model(input_size=(128, 128, 3)):
    inputs = tf.keras.Input(shape=input_size)

    # Encoder
    conv1 = layers.Conv2D(64, 3, activation='relu', padding='same')(inputs)
    conv1 = layers.Conv2D(64, 3, activation='relu', padding='same')(conv1)
    pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = layers.Conv2D(128, 3, activation='relu', padding='same')(pool1)
    conv2 = layers.Conv2D(128, 3, activation='relu', padding='same')(conv2)
    pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = layers.Conv2D(256, 3, activation='relu', padding='same')(pool2)
    conv3 = layers.Conv2D(256, 3, activation='relu', padding='same')(conv3)
    pool3 = layers.MaxPooling2D(pool_size=(2, 2))(conv3)

    # Bottom
    conv4 = layers.Conv2D(512, 3, activation='relu', padding='same')(pool3)
    conv4 = layers.Conv2D(512, 3, activation='relu', padding='same')(conv4)

    # Decoder
    up5 = layers.UpSampling2D(size=(2, 2))(conv4)
    concat5 = layers.concatenate([conv3, up5], axis=-1)
    conv5 = layers.Conv2D(256, 3, activation='relu', padding='same')(concat5)
    conv5 = layers.Conv2D(256, 3, activation='relu', padding='same')(conv5)

    up6 = layers.UpSampling2D(size=(2, 2))(conv5)
    concat6 = layers.concatenate([conv2, up6], axis=-1)
    conv6 = layers.Conv2D(128, 3, activation='relu', padding='same')(concat6)
    conv6 = layers.Conv2D(128, 3, activation='relu', padding='same')(conv6)

    up7 = layers.UpSampling2D(size=(2, 2))(conv6)
    concat7 = layers.concatenate([conv1, up7], axis=-1)
    conv7 = layers.Conv2D(64, 3, activation='relu', padding='same')(concat7)
    conv7 = layers.Conv2D(64, 3, activation='relu', padding='same')(conv7)

    outputs = layers.Conv2D(1, 1, activation='sigmoid')(conv7)  # Adjust the number of output channels based on your task

    model = Model(inputs=inputs, outputs=outputs)
    return model

# Create the U-Net model
model = unet_model(input_size=(128, 128, 3))
model.compile(loss = 'mean_squared_error', optimizer = Adam(learning_rate = 0.00005))
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 128, 128, 3)]        0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 128, 128, 64)         1792      ['input_1[0][0]']             
                                                                                                  
 conv2d_1 (Conv2D)           (None, 128, 128, 64)         36928     ['conv2d[0][0]']              
                                                                                                  
 max_pooling2d (MaxPooling2  (None, 64, 64, 64)           0         ['conv2d_1[0][0]']            
 D)                                                                                           

In [None]:
history = model.fit(
    train_gen,
    validation_data=test_gen,
    epochs = 1,
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
