In [1]:
import os
import pathlib
import numpy as np
import tensorflow as tf

In [2]:
# Define the directory of the dataset
data_dir = pathlib.Path('/Users/filippouslenghi/msa/dataset')
# Collects the path of all the files within the dataset
data_paths = [str(path) for path in list(data_dir.glob("*/*.jpg"))]
print(f"Images in the dataset: {len(data_paths)}")

Images in the dataset: 24951


In [3]:
# Create the respective tf.data.Dataset object
dataset = tf.data.Dataset.from_tensor_slices(data_paths)
# Shuffle the dataset
dataset = dataset.shuffle(len(data_paths))

Metal device set to: Apple M1 Pro


2022-07-05 15:59:50.688960: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-07-05 15:59:50.689054: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [4]:
# Get the class names
class_names = np.array(sorted([item.name for item in data_dir.glob('*')]))
print(class_names)

['Cats' 'Dogs']


In [5]:
# Create a validation set
val_size = int(len(list(dataset)) * 0.2)
train = dataset.skip(val_size)
val = dataset.take(val_size)

In [6]:
# Set initial params for the loader
batch_size = 64
img_height = 150
img_width = 150

In [7]:
def get_label(file_path):
    # Convert the path to a list of path components
    parts = tf.strings.split(file_path, os.path.sep)
    # The second to last is the class-directory
    one_hot = parts[-2] == class_names
    # Integer encode the label
    return tf.argmax(one_hot)

In [8]:
def decode_img(img):
    # Convert the compressed string to a 3D uint8 tensor
    try:
        img = tf.io.decode_jpeg(img, channels=3)
    except:
        img = tf.io.decode_bmp(img, channels=3)
    # Resize the image to the desired size
    return tf.image.resize(img, [img_height, img_width])

In [9]:
def process_path(file_path):
    label = get_label(file_path)
    # Load the raw data from the file as a string
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img, label

In [10]:
# Create a dataset of image, label pairs
train = train.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)
val = val.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)

In [11]:
# Configure dataset for performance
def configure_for_performance(ds):
    ds = ds.cache()
    ds = ds.shuffle(buffer_size=1000)
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
    return ds

train = configure_for_performance(train)
val = configure_for_performance(val)

In [12]:
num_classes = 2

model = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255),
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_classes)
])

In [13]:
model.compile(
    optimizer='adam',
    loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'])

In [14]:
model.fit(
    train,
    validation_data=val,
    epochs=5
)

Epoch 1/5


2022-07-05 16:00:31.178514: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-07-05 16:00:31.178653: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


  7/312 [..............................] - ETA: 17s - loss: 0.7942 - accuracy: 0.4866



 21/312 [=>............................] - ETA: 15s - loss: 0.7276 - accuracy: 0.5060

Corrupt JPEG data: 99 extraneous bytes before marker 0xd9




Corrupt JPEG data: 239 extraneous bytes before marker 0xd9
Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9




Corrupt JPEG data: 65 extraneous bytes before marker 0xd9




Corrupt JPEG data: 228 extraneous bytes before marker 0xd9




Corrupt JPEG data: 214 extraneous bytes before marker 0xd9




Corrupt JPEG data: 252 extraneous bytes before marker 0xd9




Corrupt JPEG data: 396 extraneous bytes before marker 0xd9




Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9




Corrupt JPEG data: 162 extraneous bytes before marker 0xd9




2022-07-05 16:00:48.375507: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 128 extraneous bytes before marker 0xd9
Corrupt JPEG data: 65 extraneous bytes before marker 0xd9


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1602f9130>