# Importing necessary libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from helper_functions import *

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import Model
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Activation, Dense, GlobalAveragePooling2D
from tensorflow.keras import mixed_precision
import tensorflow_datasets as tfds

# Importing Food101 dataset from TensorFlow Datasets

In [None]:
(train_data, test_data), ds_info = tfds.load(name="food101",
                                             split=["train", "validation"],
                                             shuffle_files=False,
                                             as_supervised=True,
                                             with_info=True)

Downloading and preparing dataset 4.65 GiB (download: 4.65 GiB, generated: Unknown size, total: 4.65 GiB) to /root/tensorflow_datasets/food101/2.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

# Investigate our Food101 dataset
* what is the input data shape, dtype, min and max pixel values
* Are the images of a consistent size?
* Are they scaled?
* Are the data in batches?
* How do the labels look like? (are they one-hot or labeled encoded?)
* What are the class names?
* Do labels match the class names?

In [None]:
ds_info.features

In [None]:
class_names = ds_info.features['label'].names
class_names

In [None]:
for image, label in train_data.take(3):
  print(f"""
  Image shape: {image.shape}
  Image dtype: {image.dtype}
  Image pixel value, min: {tf.reduce_min(image)}, max: {tf.reduce_max(image)}
  Label dtype: {label.dtype}
  Label value: {label}
  Label value (str format): {class_names[label.numpy()]}
  """)

In [None]:
image

## Conclusions drawn
* Image sizes are not consisent, we must resize all image tensors for consistency
* Image tensors are in the wrong datatype (typically should be `tf.float32`)
* Images are not scaled (pixel values between 0 & 1), but this is not an issue as we are going to use `EfficientNetB0` as our backbone
* Labels are labeled encoded (during compilation we must use `SpareCaegoricalCrossentropy`)

## Let's visualize an image

In [None]:
plt.imshow(image)
plt.axis("off")
plt.title(class_names[label.numpy()]);

# Constructing an input pipeline

In [None]:
train_data = train_data.map(image_preprocessing, num_parallel_calls=tf.data.AUTOTUNE)  # Apply preprocessing with parallelism
train_data = train_data.shuffle(buffer_size=1000).batch(batch_size=32).prefetch(buffer_size=tf.data.AUTOTUNE)  # Shuffle, batch, and prefetch

test_data = test_data.map(image_preprocessing, num_parallel_calls=tf.data.AUTOTUNE)  # Apply preprocessing with parallelism
test_data = test_data.batch(batch_size=32).prefetch(buffer_size=tf.data.AUTOTUNE)  # Batch and prefetch

In [10]:
train_data, test_data

(<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>,
 <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>)