This notebook creates a tensorflow `tf.data.Dataset` object from images on disk

In [1]:
import os
import tensorflow as tf
tf.enable_eager_execution()
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
# Parameters
img_dir="train_images/"
downsampled_image_size = (224, 224)
label_file = "train.csv"
batchsize = 64
num_labels = 5

In [3]:
def load_and_preprocess_image(img_url, label):
    """
    Preprocessing function
    """
    img = tf.read_file(img_url)
    img = tf.image.decode_png(img)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize_images(img, downsampled_image_size)
    min_val = tf.reduce_min(img)
    max_val = tf.reduce_max(img)
    img = (img - min_val) / (max_val - min_val)
    return img, label

In [4]:
def make_dataset():
    """
    Load labels and image IDs and create a tf.data.Dataset object
    """
    df = pd.read_csv("train.csv")
    df.loc[:, "url"] = img_dir + df.loc[:, "id_code"] + ".png"
    dataset = tf.data.Dataset.from_tensor_slices((df["url"], df["diagnosis"]))
    dataset = dataset.shuffle(buffer_size=len(df))
    dataset = dataset.map(load_and_preprocess_image, num_parallel_calls=-1)
    dataset = dataset.batch(batchsize)
    dataset = dataset.prefetch(1)
    return dataset

In [5]:
dataset = make_dataset()

In [15]:
def make_model():
    mobile_net = tf.keras.applications.ResNet50(
        input_shape=downsampled_image_size + (3,), 
        include_top=False)
    mobile_net.trainable = False
    model = tf.keras.Sequential([
        mobile_net, 
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(num_labels)])
    model.compile(optimizer="adam", 
        loss=tf.keras.losses.sparse_categorical_crossentropy,
        metrics=["accuracy"])
    
    return model

In [16]:
model = make_model()

In [17]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 7, 7, 2048)        23587712  
_________________________________________________________________
global_average_pooling2d_2 ( (None, 2048)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 10245     
Total params: 23,597,957
Trainable params: 10,245
Non-trainable params: 23,587,712
_________________________________________________________________


In [None]:
model.fit(dataset, epochs=1)

In [None]:
model.save("Model.h5")

In [None]:
#df = pd.read_csv("test.csv")
#    df.loc[:, "url"] = img_dir + df.loc[:, "id_code"] + ".png"
#    dataset = tf.data.Dataset.from_tensor_slices((df["url"], df["diagnosis"]))