# What kind of doggo is that? Classifying Dog Breeds using Convolutional Neural Networks

## Task 4: Model Pipeline

In this task, we will show how to build an efficient model input pipeline based on our preprocessing steps from the previous task.

In [17]:
import os
import glob
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [18]:
filenames = glob.glob('Data/*/*.jpg')

In [19]:
labels = [x.split('/')[1] for x in filenames]

In [20]:
breeds = np.unique(labels)
breeds_to_label = {breed: ix for ix, breed in enumerate(breeds)}
breeds_to_label

{'american_bulldog': 0,
 'american_pit_bull_terrier': 1,
 'beagle': 2,
 'boxer': 3,
 'english_cocker_spaniel': 4,
 'saint_bernard': 5,
 'samoyed': 6,
 'scottish_terrier': 7,
 'shiba_inu': 8,
 'staffordshire_bull_terrier': 9}

In [21]:
filenames_train, filenames_val, y_train, y_val = train_test_split(filenames, labels, stratify=labels, random_state=8)

In [22]:
# build a lookup table
table = tf.lookup.StaticHashTable(
    initializer=tf.lookup.KeyValueTensorInitializer(
        keys=tf.constant(breeds.tolist()),
        values=tf.constant(np.arange(len(breeds)).tolist()),
    ),
    default_value=tf.constant(-1),
    name="class_weight"
)

# now let us do a lookup
input_tensor = tf.constant(['beagle'], dtype=tf.string)
table.lookup(input_tensor)

<tf.Tensor: shape=(1,), dtype=int32, numpy=array([2], dtype=int32)>

In [23]:
def reshape_rescale_image(image, height=100, width=100):
    image = tf.cast(image, tf.float32)/255.
    image = tf.image.resize_with_pad(image, target_height=height, target_width=width)
    return image

def parse_image(file_path):
    label = tf.strings.split(file_path, os.sep)[-2]
    image = tf.io.decode_image(tf.io.read_file(file_path), channels=3)
    image.set_shape([None, None, 3])
    return reshape_rescale_image(image), table.lookup(label)

In [24]:
dataset_train = tf.data.Dataset.list_files(filenames_train)
dataset_train = dataset_train.map(parse_image).shuffle(8).batch(64).prefetch(buffer_size=400).cache()

dataset_val = tf.data.Dataset.list_files(filenames_val)
dataset_val = dataset_val.map(parse_image).shuffle(8).batch(64).prefetch(buffer_size=100).cache()

In [25]:
num_classes = len(breeds)

model = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.RandomContrast(0.2),
    tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
    tf.keras.layers.Dropout(0.3),
])

In [26]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [27]:
model(sample)

NameError: name 'sample' is not defined