In [1]:
# tf tools
import tensorflow as tf

# image processsing
from tensorflow.keras.preprocessing.image import (load_img,
                                                  img_to_array,
                                                  ImageDataGenerator)
# VGG16 model
from tensorflow.keras.applications.vgg16 import (preprocess_input,
                                                 decode_predictions,
                                                 VGG16)

# layers
from tensorflow.keras.layers import (Flatten, 
                                     Dense, 
                                     Dropout, 
                                     BatchNormalization)

from tensorflow.keras import layers

# generic model object
from tensorflow.keras.models import Model

# optimizers
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.optimizers import SGD

# scikit-learn
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report

# for plotting
import numpy as np
import matplotlib.pyplot as plt

# path tools
import os
import pandas as pd
import json

2023-04-24 15:10:14.252024: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [17]:
# loading json metadata
test_df = pd.read_json(os.path.join("..", "images", "metadata", "test_data.json"), lines=True)
train_df = pd.read_json(os.path.join("..", "images", "metadata", "train_data.json"), lines=True)
val_df = pd.read_json(os.path.join("..", "images", "metadata", "val_data.json"), lines=True)

In [None]:
# creating random samle
#test_df_sample = test_df.sample(n=3000)
#train_df_sample = train_df.sample(n=10000)
#val_df_sample = test_df.sample(n=2000)

In [12]:
# creating random samle
test_df_sample = test_df.sample(n=75)
train_df_sample = train_df.sample(n=250)
val_df_sample = test_df.sample(n=50)

In [13]:
# data generating settings
train_datagen = ImageDataGenerator(horizontal_flip=True,
                                    rotation_range=20,
                                    rescale=1/255
)

test_datagen = ImageDataGenerator(
                                rescale=1./255.
)

In [14]:
# setting image directory
image_directory = os.path.join("..")

In [15]:
# settings for sizes
batch_size = 32
target_size = (224, 224)

In [18]:
# generating images
test_images = test_datagen.flow_from_dataframe(
    dataframe = test_df_sample,
    directory = image_directory,
    x_col = "image_path",
    y_col = "class_label",
    target_size = target_size,
    color_mode = "rgb",
    class_mode = "categorical",
    batch_size = batch_size,
    shuffle = False,
)

train_images = train_datagen.flow_from_dataframe(
    dataframe = train_df_sample,
    directory = image_directory,
    x_col = "image_path",
    y_col = "class_label",
    target_size = target_size,
    color_mode = "rgb",
    class_mode = "categorical",
    batch_size = batch_size,
    shuffle = True,
    seed = 42,
    subset = "training"
)

val_images = train_datagen.flow_from_dataframe(
    dataframe = val_df_sample,
    directory = image_directory,
    x_col = "image_path",
    y_col = "class_label",
    target_size = target_size,
    color_mode = "rgb",
    class_mode = "categorical",
    batch_size = batch_size,
    shuffle = True,
    seed = 42,
)

Found 75 validated image filenames belonging to 15 classes.
Found 250 validated image filenames belonging to 15 classes.
Found 50 validated image filenames belonging to 15 classes.


In [19]:
# load model without classifier layers
model = VGG16(include_top=False, # this removes the final classification network
              pooling='avg', # put an average pooling layer in the top instead
              input_shape=(224, 224, 3)) # changing input shape to the predefined shape of the data

In [20]:
# mark loaded layers as not trainable
for layer in model.layers:
    layer.trainable = False
    
# add new classifier layers
flat1 = Flatten()(model.layers[-1].output)
bn = BatchNormalization()(flat1)
class1 = Dense(256, 
               activation='relu')(bn)
class2 = Dense(128, 
               activation='relu')(class1)
output = Dense(15, 
               activation='softmax')(class2)

# define new model
model = Model(inputs=model.inputs, 
              outputs=output)

In [21]:
# compile
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.01,
    decay_steps=10000,
    decay_rate=0.9)
sgd = SGD(learning_rate=lr_schedule)

model.compile(optimizer=sgd,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [22]:
# fits the model on batches with real-time data augmentation:
history = model.fit(
    train_images,
    steps_per_epoch = len(train_images),
    validation_data = val_images,
    validation_steps = len(val_images),
    epochs = 10)

2023-04-24 15:18:41.955227: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 1/10


In [2]:
# plotting helper function
def plot_history(history, epochs):
    plt.style.use("seaborn-colorblind")

    plt.figure(figsize=(12,6))
    plt.subplot(1,2,1)
    plt.plot(np.arange(0, epochs), history.history["loss"], label="train_loss")
    plt.plot(np.arange(0, epochs), history.history["val_loss"], label="val_loss", linestyle=":")
    plt.title("Loss curve")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.tight_layout()
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(np.arange(0, epochs), history.history["accuracy"], label="train_acc")
    plt.plot(np.arange(0, epochs), history.history["val_accuracy"], label="val_acc", linestyle=":")
    plt.title("Accuracy curve")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.tight_layout()
    plt.legend()
    plt.show()

In [3]:
plot_history(history, 10)

NameError: name 'history' is not defined

In [None]:
# from nbs3!!!!!
    # save the plot
    output_path = os.path.join("..", "out", filename)
    plt.savefig(output_path)
    # Show plot
    plt.show()

In [15]:
# classification report
predictions = model.predict(test_images, batch_size=128)
report = classification_report(test_images.argmax(axis=1),
                            predictions.argmax(axis=1),
                            target_names="class_label")

2023-04-24 12:03:43.120732: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




AttributeError: 'DataFrameIterator' object has no attribute 'argmax'

In [None]:
# defining outpath
folderpath = os.path.join("out")
# defining filename
filename = "classification_report.txt"
# writing and saving classification report
filepath = os.path.join(folderpath, filename)
with open(filepath, "w") as f:
    f.write(report)