In [1]:
import tensorflow as tf
import IPython.display as display
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os
import pathlib

In [14]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
FOLDER_NAME = "natural_images"
data_dir = pathlib.Path(str(FOLDER_NAME))
image_count = len(list(data_dir.glob('*/*.jpg')))
print(image_count)
CLASS_NAMES = np.array([item.name for item in data_dir.glob('*') if item.name != "LICENSE.txt"])
print(CLASS_NAMES)

BATCH_SIZE = 1000
IMG_HEIGHT = 128
IMG_WIDTH = 128
BUFFER_SIZE = image_count
TRAIN_SIZE = int(image_count * 0.7)
# is the next line necessary?
STEPS_PER_EPOCH = np.ceil(image_count/BATCH_SIZE)

"""
#slow method

#rescale maps to a range between 0 and 1 | validation_split determines the size of the validation set
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255, validation_split=0.25)
#generating the training and test set with the argument "subset", 
#requires validation_split to be defined in the ImageDataGenerator
train_data_gen = image_generator.flow_from_directory(directory=str(data_dir),
                                                     batch_size=BATCH_SIZE,
                                                     shuffle=True,
                                                     target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                     classes = list(CLASS_NAMES),
                                                     subset='training')
test_data_gen = image_generator.flow_from_directory(directory=str(data_dir),
                                                     batch_size=BATCH_SIZE,
                                                     shuffle=True,
                                                     target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                     classes = list(CLASS_NAMES),
                                                     subset='validation')
def show_batch(image_batch, label_batch):
    plt.figure(figsize=(10,10))
    for n in range(25):
        ax = plt.subplot(5,5,n+1)
        plt.imshow(image_batch[n])
        plt.title(CLASS_NAMES[label_batch[n]==1][0].title())
        plt.axis('off')
        

train_image_batch, train_label_batch = next(train_data_gen)
test_image_batch, test_label_batch = next(test_data_gen)
print(train_image_batch.shape)
#show_batch(image_batch, label_batch)
"""

6899
['7' '5' '0' '2' '4' '3' '1' '6']


'\n#slow method\n\n#rescale maps to a range between 0 and 1 | validation_split determines the size of the validation set\nimage_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255, validation_split=0.25)\n#generating the training and test set with the argument "subset", \n#requires validation_split to be defined in the ImageDataGenerator\ntrain_data_gen = image_generator.flow_from_directory(directory=str(data_dir),\n                                                     batch_size=BATCH_SIZE,\n                                                     shuffle=True,\n                                                     target_size=(IMG_HEIGHT, IMG_WIDTH),\n                                                     classes = list(CLASS_NAMES),\n                                                     subset=\'training\')\ntest_data_gen = image_generator.flow_from_directory(directory=str(data_dir),\n                                                     batch_size=BATCH_SIZE,\n        

In [15]:
#better method

list_ds = tf.data.Dataset.list_files(str(data_dir/'*/*')).shuffle(BUFFER_SIZE)
train_list_ds = list_ds.take(TRAIN_SIZE)
test_list_ds = list_ds.skip(TRAIN_SIZE)

def get_label(file_path):
    # convert the path to a list of path components
    parts = tf.strings.split(file_path, os.path.sep)
    # The second to last is the class-directory
#    return parts[-2] == CLASS_NAMES
    # return the class directory (second to last element in the path) as an integer
    return int(parts[-2])
def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    img = tf.image.convert_image_dtype(img, tf.float32)
    # resize the image to the desired size.
    return tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])
def process_path(file_path):
    label = get_label(file_path)
    # load the raw data from the file as a string
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img, label

# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
train_labeled_ds = train_list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
test_labeled_ds = train_list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
#for image, label in labeled_ds.take(1):
#    print("Image shape: ", image.numpy().shape)
#    print("Label: ", label.numpy())
    
def prepare_for_training(ds, cache=True, shuffle_buffer_size=BUFFER_SIZE):
    # This is a small dataset, only load it once, and keep it in memory.
    # use `.cache(filename)` to cache preprocessing work for datasets that don't
    # fit in memory.
    if cache:
        if isinstance(cache, str):
            ds = ds.cache(cache)
    else:
        ds = ds.cache()

    ds = ds.shuffle(buffer_size=shuffle_buffer_size)

    # Repeat forever
    ds = ds.repeat()
    ds = ds.batch(BATCH_SIZE)

    # `prefetch` lets the dataset fetch batches in the background while the model
    # is training.
    ds = ds.prefetch(buffer_size=AUTOTUNE)

    return ds

In [16]:
train_ds = prepare_for_training(train_labeled_ds)
test_ds = prepare_for_training(test_labeled_ds)

train_image_batch, train_label_batch = next(iter(train_ds))
test_image_batch, test_label_batch = next(iter(test_ds))

In [17]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(8, activation='softmax')
])

model.compile(optimizer=tf.keras.optimizers.RMSprop(),
                loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

model.fit(train_image_batch, train_label_batch, epochs=10)
print()
test_loss, test_acc = model.evaluate(test_image_batch,  test_label_batch, verbose=2)
print('\nTest accuracy:', test_acc)



Train on 1000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

1000/1000 - 0s - loss: 4.1384 - sparse_categorical_accuracy: 0.4660

Test accuracy: 0.466
