In [1]:
# pip install opencv-python

In [14]:
import matplotlib
import sklearn
import numpy as np
import pandas as pd
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import pathlib
import os
import cv2
import datetime

from os import listdir
from os.path import isfile, isdir, join
from pathlib import Path

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint,CSVLogger
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model
from tensorflow.keras import optimizers
from tensorflow.keras import regularizers
from keras.models import load_model
from keras.models import Model
from tensorflow.keras.layers import concatenate

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

print("Versions of key libraries")
print("---")
print("tensorflow: ", tf.__version__)
print("numpy:      ", np.__version__)
print("matplotlib: ", matplotlib.__version__)
print("sklearn:    ", sklearn.__version__)
print(tf.config.list_physical_devices())

Versions of key libraries
---
tensorflow:  2.9.0
numpy:       1.22.3
matplotlib:  3.5.2
sklearn:     1.1.1
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [15]:
members_param = [
    {
        "model_name": "general",
        "file_path": "./models/medium_224_20_64_1_2022-09-25_19-33-18.hdf5",
        "class_names": ['arowana', 'betta', 'goldfish', 'flowerhorn'],
        "img_width": 224,
        "img_height":160
    },
    {
        "model_name": "arawana",
        "file_path": "./models/arowana_softmax_128_50_64_1_2022-09-25_22-34-38.hdf5",
        "class_names": ['not arowana', 'arowana'],
        "img_width": 128,
        "img_height":128
    },
    {
        "model_name": "betta",
        "file_path": "./models/betta_softmax_128_50_64_1_2022-09-26_17-51-39.hdf5",
        "class_names": ['not betta', 'betta'],
        "img_width": 128,
        "img_height":128
    },
    {
        "model_name": "goldfish",
        "file_path": "./models/goldfish_softmax_128_50_64_1_2022-09-25_22-37-40.hdf5",
        "class_names": ['not goldfish', 'goldfish'],
        "img_width": 128,
        "img_height":128
    },
    {
        "model_name": "flowerhorn",
        "file_path": "./models/luohan_softmax_128_50_64_1_2022-09-25_22-42-17.hdf5",
        "class_names": ['not flowerhorn', 'flowerhorn'],
        "img_width": 128,
        "img_height":128
    }
]

modelname   = 'arbitrator'
BATCH_SIZE = 32 # Big enough to measure an F1-score
AUTOTUNE = tf.data.AUTOTUNE # Adapt preprocessing and prefetching dynamically
SHUFFLE_BUFFER_SIZE = 1024 # Shuffle the training data by a chunck of 1024 observations
IMG_WIDTH = 224
IMG_HEIGHT = 160
CHANNELS = 3
SEED = 7
N_LABELS = 4
EPOCHS = 5
OPT_IDX = 1
ACTIVATION = 'softmax'

class_names = ''
modelname = modelname+"_"+ACTIVATION+"_"+str(EPOCHS)+"_"+str(BATCH_SIZE)+"_"+str(OPT_IDX)
optmzs = ['adam', optimizers.RMSprop(learning_rate=0.0001)]
optmz = optmzs[OPT_IDX]

In [16]:
def readImagesFromDir(base_img_path='dataset/'):
    dirs = [d for d in listdir(base_img_path) if isdir(join(base_img_path, d)) and not d.startswith('.') and not d in ['oranda', 'common_goldfish']]

    print(dirs)

    X, y = [], []

    for d in dirs:
        img_path = base_img_path + d + "/"
        files = [f for f in listdir(img_path) if isfile(join(img_path, f))]
        X = X + [os.path.join(img_path, f) for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        y = y + [d for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        #print(d)

    data_dir = Path(base_img_path)
    image_count = len(list(data_dir.glob('*/*.*')))
    
    return X, y


def preprocess_image(filename, label):
    """Function that returns a tuple of normalized image array and labels array.
    Args:
        filename: string representing path to image
        label: 0/1 one-dimensional array of size N_LABELS
    """
    # Read an image from a file
    images = {}
    image_string = tf.io.read_file(filename)
    image_decoded = tf.image.decode_jpeg(image_string, channels=CHANNELS)
#     image_resized = tf.image.resize_with_pad(image_decoded, IMG_HEIGHT, IMG_WIDTH, antialias=False)
#     image_normalized = image_resized / 255.0
    for i,m in enumerate(members_param):
        image_resized = tf.image.resize_with_pad(image_decoded, m["img_height"], m["img_width"], antialias=False)
        image_normalized = image_resized / 255.0
        images["input_"+str(i)] = image_normalized
        
    return images, label


def create_dataset(filenames, labels, is_training=True):
    """Load and parse dataset.
    Args:
        filenames: list of image paths
        labels: numpy array of shape (BATCH_SIZE, N_LABELS)
        is_training: boolean to indicate training mode
    """
    
#     imageDS = 
    
#     labelDS = tf.data.Dataset.from_tensor_slices(labels)
    # Create a first dataset of file paths and labels
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    # Parse and preprocess observations in parallel
    dataset = dataset.map(preprocess_image, num_parallel_calls=AUTOTUNE)
    
    if is_training == True:
        #dataset = dataset.take(BATCH_SIZE)
        # This is a small dataset, only load it once, and keep it in memory.
        #dataset = dataset.cache()
        #dataset = dataset.repeat()
        # Shuffle the data each buffer size
        dataset = dataset.shuffle(buffer_size=SHUFFLE_BUFFER_SIZE)
        #dataset = dataset.repeat()
        
    # Batch the data for multiple steps    
    dataset = dataset.batch(BATCH_SIZE)
    # Fetch batches in the background while the model is training.
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    
    return dataset
    
def visualize_image(original, augmented):
    org_img = tf.keras.utils.array_to_img(original)
    fig = plt.figure()
    plt.subplot(1,2,1)
    plt.title('Original image')
    plt.imshow(org_img)

    aug_img = tf.keras.utils.array_to_img(augmented)
    plt.subplot(1,2,2)
    plt.title('Augmented image')
    plt.imshow(aug_img)


def predict_new_image(img_file):    
    #img = tf.keras.utils.load_img(
    #    img_file, target_size=(IMG_HEIGHT, IMG_WIDTH), keep_aspect_ratio=True
    #)
    img = tf.keras.utils.load_img(
        img_file, target_size=None, keep_aspect_ratio=True
    )

    img_array = tf.keras.utils.img_to_array(img)
    img_array = tf.keras.preprocessing.image.smart_resize(img_array, size=(IMG_HEIGHT, IMG_WIDTH))
    img_array = tf.expand_dims(img_array, 0) # Create a batch
    image_normalized = img_array / 255.0  # tf.image.per_image_standardization(img_array)

    saved_model = load_model(model_file)

    predictions = model.predict(image_normalized)
    score = tf.nn.softmax(predictions[0])
    #score = tf.sigmoid(predictions[0])
    #score = tf.nn.sigmoid_cross_entropy_with_logits(labels=class_names, logits=predictions[0])
    #score = tf.math.sigmoid(predictions[0])
    #score = tf.tanh(predictions[0])

    #model.evaluate(img_array)
    plt.imshow(img)
    plt.show()

    #print(predictions[0])

    print(
        "This image most likely belongs to {} with a {:.2f} percent confidence."
        .format(class_names[np.argmax(score)], 100 * np.max(score))
    )
    print()

In [17]:
X, Y = readImagesFromDir()

X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2, shuffle=True, random_state=SEED)

lb = LabelBinarizer()
lb.fit(y_train)
class_names = lb.classes_
# Loop over all labels and show them    
N_LABELS = len(class_names)
for (i, label) in enumerate(class_names):
    print("{}. {}".format(i, label))

# transform the targets of the training and test sets
y_train_bin = lb.transform(y_train)
y_val_bin = lb.transform(y_val)

print(y_train_bin.shape)
print(y_val_bin.shape)

['arowana', 'betta', 'goldfish', 'luohan']
0. arowana
1. betta
2. goldfish
3. luohan
(5872, 4)
(1469, 4)


In [18]:
# Print examples of fish and their binary targets
for i in range(10):
    print(X_train[len(X_train)-1 - i], y_train_bin[len(y_train_bin)-1 - i])

dataset/goldfish/Goldfish2293.jpg [0 0 1 0]
dataset/arowana/Arowana_101220.jpg [1 0 0 0]
dataset/arowana/Arowana_100537.jpg [1 0 0 0]
dataset/betta/betta0551.jpg [0 1 0 0]
dataset/luohan/Flowerhorn10533.jpg [0 0 0 1]
dataset/goldfish/Goldfish2329.jpg [0 0 1 0]
dataset/arowana/Arowana_100919.jpg [1 0 0 0]
dataset/luohan/Flowerhorn10569.jpg [0 0 0 1]
dataset/arowana/Arowana_101372.jpg [1 0 0 0]
dataset/goldfish/Goldfish1244.jpg [0 0 1 0]


In [19]:
# train_ds = []
# val_ds = []

train_ds = create_dataset(X_train, y_train_bin)
val_ds = create_dataset(X_val, y_val_bin, is_training=False)

# for images, labels in train_ds.take(1):
#     print("Shape of features array:", images.numpy().shape)
#     print("Shape of labels array:", labels.numpy().shape)
#     #plt.imshow(f.numpy().astype("uint8"))
#     for i in range(5):
#         ax = plt.subplot(2, 3, i + 1)
#         img = tf.keras.utils.array_to_img(images[i])
#         plt.imshow(img)
#         plt.title(class_names[np.argmax(labels[i])])
#         plt.axis("off")

In [None]:
def add_prefix(model, prefix: str, i:str, custom_objects=None):
    config = model.get_config()
    new_to_old = {}
    for layer in config['layers']:
#         print(layer)
        new_name = prefix + i + layer['config']['name']
        if layer['class_name']=='InputLayer':
            new_name = "input_"+i
        new_to_old[new_name] = layer['config']['name']
#         layer['name'] = new_name
        layer['config']['name'] = new_name
    if config['name'].startswith('sequential'):
        new_model = tf.keras.Sequential().from_config(config, custom_objects)
    else:
        new_model = tf.keras.Model().from_config(config, custom_objects)
    for layer in new_model.layers:
        layer.set_weights(model.get_layer(new_to_old[layer.name]).get_weights())
    
    return new_model

# load models from file
def load_all_models(members_param):
    all_models = list()
    prefix = "ensemble_"
    
    for i, m in enumerate(members_param):
        # define filename for this ensemble
        filename = m["file_path"]
        # load model from file
        model = load_model(filename)
        # add to list of members
        all_models.append(add_prefix(model,prefix,str(i)))
        print('>loaded %s' % filename)
    return all_models
 
# define stacked model from multiple member input models
def define_stacked_model(members):
    # update all layers in all models to not be trainable
    for i in range(len(members)):
        model = members[i]
        for layer in model.layers:
            # make not trainable
            layer.trainable = False
    # define multi-headed input
    ensemble_visible = [model.input for model in members]
    # concatenate merge output from each model
    ensemble_outputs = [model.output for model in members]
    merge = concatenate(ensemble_outputs)
    hidden = Dense(12, activation='relu')(merge)
    output = Dense(N_LABELS, activation=ACTIVATION)(hidden)
    model = Model(inputs=ensemble_visible, outputs=output)

    # compile
    model.compile(loss='categorical_crossentropy', optimizer=optmz, metrics=['accuracy'])
    return model
member_models = load_all_models(members_param)
model = define_stacked_model(member_models)
model.summary()

In [None]:
                                                                                # Step 1
modelname       = modelname+"_"+str(datetime.datetime.now())[:-7].replace(' ','_').replace(":",'-')
folderpath      = 'models/'
model_json      = folderpath + modelname + ".json"
with open(model_json, "w") as json_file:
    json_file.write(model.to_json())
model_file      = folderpath + modelname + ".hdf5"
checkpoint      = ModelCheckpoint(model_file, 
                                  monitor='val_accuracy', 
                                  verbose=0, 
                                  save_best_only=True, 
                                  mode='max')

csv_logger      = CSVLogger(folderpath+modelname +'.csv')                       # Step 2
callbacks_list  = [checkpoint,csv_logger]                                       # Step 3

print("Path to model:", model_file)
print("Path to log:  ", folderpath+modelname+'.csv')

In [None]:
import time as time

start = time.time()
history = model.fit(train_ds,                            # Training data and label
          validation_data=val_ds,   # Validation data and label
          epochs=EPOCHS,                       # The amount of epochs to be trained
          batch_size=BATCH_SIZE,                   
          shuffle=True,                     # To shuffle the training data
          callbacks=callbacks_list)         # Callbacks to execute the checkpoints

end = time.time()
duration = round(((end - start)/60), 2)
print("duration = ", duration, " minutes")

In [None]:
plotpath  = folderpath+modelname+'_plot.png'
plot_model(model, 
           to_file=plotpath, 
           show_shapes=True, 
           show_layer_names=False,
           rankdir='TB')
print("Path to plot:", plotpath)

In [None]:
                                                                                # Step 1
modelGo = load_model(model_file)

predicts    = modelGo.predict(val_ds)                                            # Step 2
print("Prediction completes.")

In [None]:
                                                                                # Step 1
                                                                                # Step 2
predout     = np.argmax(predicts,axis=1)
testout     = np.argmax(y_val_bin,axis=1)

testScores  = metrics.accuracy_score(testout,predout)                           # Step 3

                                                                                # Step 4
print("Best accuracy (on testing dataset): %.2f%%" % (testScores*100))
print(metrics.classification_report(testout,
                                    predout,
                                    target_names=class_names,
                                    digits=4))

report = metrics.classification_report(testout,
                                    predout,
                                    target_names=class_names,
                                    digits=4,
                                      output_dict=True)

df = pd.DataFrame(report).transpose()
df.to_csv(folderpath+modelname+'_report.csv')


In [None]:
resdf = pd.DataFrame()
resdf["file"] = X_val
resdf["fish"] = y_val
resdf["testout"] = testout
resdf["predout"] = predout
print(resdf)

In [None]:
#print all wrong predition
resdf[resdf.testout !=resdf.predout]