In [1]:
# Imports needed

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from google.colab import drive 
import random

In [None]:
img_height = 28
img_width = 28
batch_size = 2

model = keras.Sequential(
    [
        layers.InputLayer((28, 28, 1)),
        layers.Conv2D(16, 3, padding="same"),
        layers.Conv2D(32, 3, padding="same"),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(10),
    ]
)


In [2]:
random.seed(42)
gdrive_path = '/content/gdrive'
drive.mount(gdrive_path)

Mounted at /content/gdrive


In [3]:
dataset_path = '/content/gdrive/My Drive/Colab Notebooks/CV/Dataset_Transferlearning/'

### Method 1

Using dataset_from_directory

In [None]:
ds_train = tf.keras.preprocessing.image_dataset_from_directory(
    dataset_path+"data/mnist_subfolders/",
    labels="inferred",
    label_mode="int",  # categorical, binary
    # class_names=['0', '1', '2', '3', ...]
    color_mode="grayscale",
    batch_size=batch_size,
    image_size=(img_height, img_width),  # reshape if not in this size
    shuffle=True,
    seed=123,
    validation_split=0.1,
    subset="training",
)

ds_validation = tf.keras.preprocessing.image_dataset_from_directory(
    dataset_path+"data/mnist_subfolders/",
    labels="inferred",
    label_mode="int",  # categorical, binary
    # class_names=['0', '1', '2', '3', ...]
    color_mode="grayscale",
    batch_size=batch_size,
    image_size=(img_height, img_width),  # reshape if not in this size
    shuffle=True,
    seed=123,
    validation_split=0.1,
    subset="validation",
)


def augment(x, y):
    image = tf.image.random_brightness(x, max_delta=0.05)
    return image, y


ds_train = ds_train.map(augment)

# Custom Loops
for epochs in range(10):
    for x, y in ds_train:
        # train here
        pass


model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=[keras.losses.SparseCategoricalCrossentropy(from_logits=True),],
    metrics=["accuracy"],
)

model.fit(ds_train, epochs=10, verbose=2)

Found 50 files belonging to 10 classes.
Using 45 files for training.
Found 50 files belonging to 10 classes.
Using 5 files for validation.
Epoch 1/10
23/23 - 1s - loss: 66.3582 - accuracy: 0.2222
Epoch 2/10
23/23 - 0s - loss: 10.7957 - accuracy: 0.7333
Epoch 3/10
23/23 - 0s - loss: 2.3985 - accuracy: 0.8889
Epoch 4/10
23/23 - 0s - loss: 0.3245 - accuracy: 0.9333
Epoch 5/10
23/23 - 0s - loss: 0.0195 - accuracy: 0.9778
Epoch 6/10
23/23 - 0s - loss: 0.0036 - accuracy: 1.0000
Epoch 7/10
23/23 - 0s - loss: 2.9259e-04 - accuracy: 1.0000
Epoch 8/10
23/23 - 0s - loss: 1.7151e-05 - accuracy: 1.0000
Epoch 9/10
23/23 - 0s - loss: 1.1876e-05 - accuracy: 1.0000
Epoch 10/10
23/23 - 0s - loss: 1.0330e-05 - accuracy: 1.0000


<tensorflow.python.keras.callbacks.History at 0x7f949f833e90>

### Method 2

ImageDataGenerator and flow_from_directory

In [None]:
datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=5,
    zoom_range=(0.95, 0.95),
    horizontal_flip=False,
    vertical_flip=False,
    data_format="channels_last",
    validation_split=0.0,
    dtype=tf.float32,
)

train_generator = datagen.flow_from_directory(
    dataset_path+"data/mnist_subfolders/",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    color_mode="grayscale",
    class_mode="sparse",
    shuffle=True,
    subset="training",
    seed=123,
)


def training():
    pass


# Custom Loops
for epoch in range(10):
    num_batches = 0

    for x, y in ds_train:
        num_batches += 1

        # do training
        training()

        if num_batches == 25:  # len(train_dataset)/batch_size
            break

# Redo model.compile to reset the optimizer states
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=[keras.losses.SparseCategoricalCrossentropy(from_logits=True),],
    metrics=["accuracy"],
)

# using model.fit (note steps_per_epoch)
model.fit(
    train_generator,
    epochs=10,
    steps_per_epoch=25,
    verbose=2,
    # if we had a validation generator:
    # validation_data=validation_generator,
    # valiation_steps=len(validation_set)/batch_size),
)

Found 50 images belonging to 10 classes.
Epoch 1/10
25/25 - 1s - loss: 1.6938 - accuracy: 0.5400
Epoch 2/10
25/25 - 0s - loss: 0.5089 - accuracy: 0.9200
Epoch 3/10
25/25 - 0s - loss: 0.0990 - accuracy: 0.9800
Epoch 4/10
25/25 - 0s - loss: 0.0247 - accuracy: 1.0000
Epoch 5/10
25/25 - 0s - loss: 0.0105 - accuracy: 1.0000
Epoch 6/10
25/25 - 0s - loss: 0.0069 - accuracy: 1.0000
Epoch 7/10
25/25 - 0s - loss: 0.0055 - accuracy: 1.0000
Epoch 8/10
25/25 - 0s - loss: 0.0035 - accuracy: 1.0000
Epoch 9/10
25/25 - 0s - loss: 0.0036 - accuracy: 1.0000
Epoch 10/10
25/25 - 0s - loss: 0.0028 - accuracy: 1.0000


<tensorflow.python.keras.callbacks.History at 0x7f949ee96710>

### Method 3

From CSV file

In [None]:
import pandas as pd

directory = dataset_path + "data/mnist_images_csv/"
df = pd.read_csv(directory + "train.csv")

In [None]:
df.head()

Unnamed: 0,file_name,label
0,0_1.jpg,0
1,0_2.jpg,0
2,0_3.jpg,0
3,0_4.jpg,0
4,0_5.jpg,0


In [None]:
file_paths = df["file_name"].values
labels = df["label"].values
ds_train = tf.data.Dataset.from_tensor_slices((file_paths, labels))


def read_image(image_file, label):
    image = tf.io.read_file(directory + image_file)
    image = tf.image.decode_image(image, channels=1, dtype=tf.float32)
    return image, label


def augment(image, label):
    # data augmentation here
    return image, label


ds_train = ds_train.map(read_image).map(augment).batch(2)

for epoch in range(10):
    for x, y in ds_train:
        # train here
        pass

model = keras.Sequential(
    [
        layers.InputLayer((28, 28, 1)),
        layers.Conv2D(16, 3, padding="same"),
        layers.Conv2D(32, 3, padding="same"),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(10),
    ]
)

model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=[keras.losses.SparseCategoricalCrossentropy(from_logits=True),],
    metrics=["accuracy"],
)

model.fit(ds_train, epochs=10, verbose=2)

Epoch 1/10
25/25 - 0s - loss: 2.7496 - accuracy: 0.0600
Epoch 2/10
25/25 - 0s - loss: 2.5375 - accuracy: 0.1600
Epoch 3/10
25/25 - 0s - loss: 1.1304 - accuracy: 0.9400
Epoch 4/10
25/25 - 0s - loss: 0.4346 - accuracy: 0.9600
Epoch 5/10
25/25 - 0s - loss: 0.1185 - accuracy: 1.0000
Epoch 6/10
25/25 - 0s - loss: 0.0436 - accuracy: 1.0000
Epoch 7/10
25/25 - 0s - loss: 0.0239 - accuracy: 1.0000
Epoch 8/10
25/25 - 0s - loss: 0.0159 - accuracy: 1.0000
Epoch 9/10
25/25 - 0s - loss: 0.0116 - accuracy: 1.0000
Epoch 10/10
25/25 - 0s - loss: 0.0089 - accuracy: 1.0000


<tensorflow.python.keras.callbacks.History at 0x7f949e359390>

### Method 4

3 single folders

In [4]:
import pathlib

In [5]:
batch_size = 2
img_height = 28
img_width = 28

In [18]:
directory = dataset_path + "data/mnist_images_only/"
ds_train = tf.data.Dataset.list_files(str(pathlib.Path(directory + "*.jpg")))

In [117]:
for filepath in ds_train:
  print(tf.strings.split(filepath, os.path.sep))

tf.Tensor(
[b'' b'content' b'gdrive' b'My Drive' b'Colab Notebooks' b'CV'
 b'Dataset_Transferlearning' b'data' b'mnist_images_only' b'7_2.jpg'], shape=(10,), dtype=string)
tf.Tensor(
[b'' b'content' b'gdrive' b'My Drive' b'Colab Notebooks' b'CV'
 b'Dataset_Transferlearning' b'data' b'mnist_images_only' b'9_3.jpg'], shape=(10,), dtype=string)
tf.Tensor(
[b'' b'content' b'gdrive' b'My Drive' b'Colab Notebooks' b'CV'
 b'Dataset_Transferlearning' b'data' b'mnist_images_only' b'7_1.jpg'], shape=(10,), dtype=string)
tf.Tensor(
[b'' b'content' b'gdrive' b'My Drive' b'Colab Notebooks' b'CV'
 b'Dataset_Transferlearning' b'data' b'mnist_images_only' b'3_4.jpg'], shape=(10,), dtype=string)
tf.Tensor(
[b'' b'content' b'gdrive' b'My Drive' b'Colab Notebooks' b'CV'
 b'Dataset_Transferlearning' b'data' b'mnist_images_only' b'2_1.jpg'], shape=(10,), dtype=string)
tf.Tensor(
[b'' b'content' b'gdrive' b'My Drive' b'Colab Notebooks' b'CV'
 b'Dataset_Transferlearning' b'data' b'mnist_images_only' b'2_5.jp

In [110]:
def get_label(file_path):
  # convert the path to a list of path components
  parts = tf.strings.split(file_path, os.path.sep)
  # The second to last is the class-directory
  label = parts[-1] 
  label = tf.strings.substr(label, pos=0, len=1)
  label = tf.strings.to_number(label, out_type=tf.int64)
  # Integer encode the label
  return label


def decode_img(img):
  # convert the compressed string to a 3D uint8 tensor
  img = tf.image.decode_jpeg(img, channels=1)
  # resize the image to the desired size
  return tf.image.resize(img, [img_height, img_width])

def process_path(file_path):
  label = get_label(file_path)
  # load the raw data from the file as a string
  img = tf.io.read_file(file_path)
  img = decode_img(img)
  return img, label


In [111]:
ds_train1 = ds_train.map(process_path).batch(batch_size)

In [112]:
model = keras.Sequential(
    [
        layers.InputLayer((28, 28, 1)),
        layers.Conv2D(16, 3, padding="same"),
        layers.Conv2D(32, 3, padding="same"),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(10),
    ]
)

model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=[keras.losses.SparseCategoricalCrossentropy(from_logits=True),],
    metrics=["accuracy"],
)

model.fit(ds_train1, epochs=10, verbose=2)

Epoch 1/10
25/25 - 2s - loss: 94.9544 - accuracy: 0.1200
Epoch 2/10
25/25 - 0s - loss: 5.8504 - accuracy: 0.7000
Epoch 3/10
25/25 - 0s - loss: 0.0799 - accuracy: 0.9800
Epoch 4/10
25/25 - 0s - loss: 0.0167 - accuracy: 0.9800
Epoch 5/10
25/25 - 0s - loss: 2.9078e-05 - accuracy: 1.0000
Epoch 6/10
25/25 - 0s - loss: 8.1100e-06 - accuracy: 1.0000
Epoch 7/10
25/25 - 0s - loss: 7.5165e-06 - accuracy: 1.0000
Epoch 8/10
25/25 - 0s - loss: 6.9254e-06 - accuracy: 1.0000
Epoch 9/10
25/25 - 0s - loss: 6.4081e-06 - accuracy: 1.0000
Epoch 10/10
25/25 - 0s - loss: 5.9171e-06 - accuracy: 1.0000


<tensorflow.python.keras.callbacks.History at 0x7fb93e62bb90>

In [114]:
model.save(dataset_path+'pretrained')


INFO:tensorflow:Assets written to: /content/gdrive/My Drive/Colab Notebooks/CV/Dataset_Transferlearning/pretrained/assets
