In [9]:
# tf tools
import tensorflow as tf

# image processsing
from tensorflow.keras.preprocessing.image import (load_img,
                                                  img_to_array,
                                                  ImageDataGenerator)
# VGG16 model
from tensorflow.keras.applications.vgg16 import (preprocess_input,
                                                 decode_predictions,
                                                 VGG16)

# layers
from tensorflow.keras.layers import (Flatten, 
                                     Dense, 
                                     Dropout, 
                                     BatchNormalization)

from tensorflow.keras import layers

# generic model object
from tensorflow.keras.models import Model

# optimizers
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.optimizers import SGD

# scikit-learn
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report

# for plotting
import numpy as np
import matplotlib.pyplot as plt

# path tools
import os
import pandas as pd
import cv2
import json

In [10]:
def make_dataframe_from_json(json_path):
    # Load JSON data into a list of dictionaries
    data = []
    with open(json_path) as f:
        for line in f:
            data.append(json.loads(line))

    # Convert list of dictionaries to a dataframe
    return pd.DataFrame(data)

test_df = make_dataframe_from_json("../in/archive/test_data.json")
train_df = make_dataframe_from_json("../in/archive/train_data.json")
val_df = make_dataframe_from_json("../in/archive/val_data.json")

In [11]:
test_generator = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input,
)

train_generator = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input,
)

val_generator = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input,
)

In [12]:
# settings
batch_size = 32
target_size = (224, 224)

In [16]:
image_directory = os.path.join("..", "..")

In [19]:
# split the data into three categories
test_images = test_generator.flow_from_dataframe(
    dataframe = test_df,
    directory = image_directory,
    x_col = "image_path",
    y_col = "class_label",
    target_size = target_size,
    color_mode = "rgb",
    class_mode = "categorical",
    batch_size = batch_size,
    shuffle = False
)

train_images = train_generator.flow_from_dataframe(
    dataframe = train_df,
    directory = image_directory,
    x_col = "image_path",
    y_col = "class_label",
    target_size = target_size,
    color_mode = "rgb",
    class_mode = "categorical",
    batch_size = batch_size,
    shuffle = True,
    seed = 42,
    subset = "training"
)

val_images = val_generator.flow_from_dataframe(
    dataframe = val_df,
    directory = image_directory,
    x_col = "image_path",
    y_col = "class_label",
    target_size = target_size,
    color_mode = "rgb",
    class_mode = "categorical",
    batch_size = batch_size,
    shuffle = True,
    seed = 42,
)

Found 0 validated image filenames belonging to 0 classes.




Found 0 validated image filenames belonging to 0 classes.
Found 0 validated image filenames belonging to 0 classes.




In [None]:
# data augmentation step
augment = tf.keras.Sequential([
  layers.experimental.preprocessing.Resizing(224,224),
  layers.experimental.preprocessing.Rescaling(1./255),
  layers.experimental.preprocessing.RandomFlip("horizontal"),
  layers.experimental.preprocessing.RandomRotation(0.1),
  layers.experimental.preprocessing.RandomZoom(0.1),
  layers.experimental.preprocessing.RandomContrast(0.1),
])

In [None]:
# load model without classifier layers
model = VGG16(include_top=False, # this removes the final classification network
              pooling='avg', # put an average pooling layer in the top instead
              input_shape=(32, 32, 3)) # changing input shape to the predefined shape of the data

In [21]:
number_of_classes = train_df["class_label"].nunique()

In [None]:
inputs = model.input
x = augment(inputs)

x = Dense(128, activation='relu')(model.output)
x = BatchNormalization()(x)
x = Dropout(0.45)(x)
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.45)(x)


outputs = Dense(number_of_classes, activation='softmax')(x)

In [None]:
# define new model
model = Model(inputs=model.inputs, 
              outputs=outputs)

# compile
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.01,
    decay_steps=10000,
    decay_rate=0.9)
sgd = SGD(learning_rate=lr_schedule)

model.compile(optimizer=sgd,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# fits the model on batches with real-time data augmentation:
history = model.fit(
  train_images,
  steps_per_epoch = len(train_images),
  validation_data = val_images,
  validation_steps = len(val_images),
  epochs = 100)

ValueError: Asked to retrieve element 0, but the Sequence has length 0

In [None]:
# evaluate
predictions = model.predict(train_images, batch_size=128)
print(classification_report(train_images.argmax(axis=1),
                            predictions.argmax(axis=1),
                            target_names="class_label"))