In [None]:
# pip install opencv-python

In [77]:
import matplotlib
import sklearn
import numpy as np
import pandas as pd
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import pathlib
import os
import cv2
import datetime

from os import listdir
from os.path import isfile, isdir, join
from pathlib import Path

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint,CSVLogger
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model
from tensorflow.keras import optimizers
from tensorflow.keras import regularizers
from keras.models import load_model
from keras.models import Model
from tensorflow.keras.layers import concatenate

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

print("Versions of key libraries")
print("---")
print("tensorflow: ", tf.__version__)
print("numpy:      ", np.__version__)
print("matplotlib: ", matplotlib.__version__)
print("sklearn:    ", sklearn.__version__)
print(tf.config.list_physical_devices())

Versions of key libraries
---
tensorflow:  2.9.0
numpy:       1.22.3
matplotlib:  3.5.2
sklearn:     1.1.1
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [78]:


members_param = [
    {
        "fishType": "general",
        "filePath": "./models/medium_224_20_64_1_2022-09-25_19-33-18.hdf5",
        "img_width": 224,
        "img_height":160
    },
    {
        "fishType": "arawana",
        "filePath": "./models/arowana_softmax_128_50_64_1_2022-09-25_22-34-38.hdf5",
        "img_width": 128,
        "img_height":128
    },
    {
        "fishType": "betta",
        "filePath": "./models/betta_softmax_128_50_64_1_2022-09-26_17-51-39.hdf5",
        "img_width": 128,
        "img_height":128
    },
    {
        "fishType": "goldfish",
        "filePath": "./models/goldfish_softmax_128_50_64_1_2022-09-25_22-37-40.hdf5",
        "img_width": 128,
        "img_height":128
    },
    {
        "fishType": "flowerhorn",
        "filePath": "./models/luohan_softmax_128_50_64_1_2022-09-25_22-42-17.hdf5",
        "img_width": 128,
        "img_height":128
    }
]

modelname   = 'arbitrator'
BATCH_SIZE = 32 # Big enough to measure an F1-score
AUTOTUNE = tf.data.AUTOTUNE # Adapt preprocessing and prefetching dynamically
SHUFFLE_BUFFER_SIZE = 1024 # Shuffle the training data by a chunck of 1024 observations
IMG_WIDTH = 224
IMG_HEIGHT = 160
CHANNELS = 3
SEED = 7
N_LABELS = 4
EPOCHS = 50
OPT_IDX = 1
ACTIVATION = 'softmax'

class_names = ''
modelname = modelname+"_"+ACTIVATION+"_"+str(EPOCHS)+"_"+str(BATCH_SIZE)+"_"+str(OPT_IDX)
optmzs = ['adam', optimizers.RMSprop(learning_rate=0.0001)]
optmz = optmzs[OPT_IDX]

In [79]:
def readImagesFromDir(base_img_path='dataset/'):
    dirs = [d for d in listdir(base_img_path) if isdir(join(base_img_path, d)) and not d.startswith('.') and not d in ['oranda', 'common_goldfish']]

    print(dirs)

    X, y = [], []

    for d in dirs:
        img_path = base_img_path + d + "/"
        files = [f for f in listdir(img_path) if isfile(join(img_path, f))]
        X = X + [os.path.join(img_path, f) for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        y = y + [d for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        #print(d)

    data_dir = Path(base_img_path)
    image_count = len(list(data_dir.glob('*/*.*')))
    
    return X, y


def preprocess_image(filename, label):
    """Function that returns a tuple of normalized image array and labels array.
    Args:
        filename: string representing path to image
        label: 0/1 one-dimensional array of size N_LABELS
    """
    # Read an image from a file
    images = []
    image_string = tf.io.read_file(filename)
    image_decoded = tf.image.decode_jpeg(image_string, channels=CHANNELS)
#     image_resized = tf.image.resize_with_pad(image_decoded, IMG_HEIGHT, IMG_WIDTH, antialias=False)
#     image_normalized = image_resized / 255.0
    for m in members_param:
        image_resized = tf.image.resize_with_pad(image_decoded, m["img_height"], m["img_width"], antialias=False)
        image_normalized = image_resized / 255.0
        images.append(image_normalized)
        
    return images, label


def create_dataset(filenames, labels, is_training=True):
    """Load and parse dataset.
    Args:
        filenames: list of image paths
        labels: numpy array of shape (BATCH_SIZE, N_LABELS)
        is_training: boolean to indicate training mode
    """
    
    # Create a first dataset of file paths and labels
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    # Parse and preprocess observations in parallel
    dataset = dataset.map(preprocess_image, num_parallel_calls=AUTOTUNE)
    
    if is_training == True:
        #dataset = dataset.take(BATCH_SIZE)
        # This is a small dataset, only load it once, and keep it in memory.
        #dataset = dataset.cache()
        #dataset = dataset.repeat()
        # Shuffle the data each buffer size
        dataset = dataset.shuffle(buffer_size=SHUFFLE_BUFFER_SIZE)
        #dataset = dataset.repeat()
        
    # Batch the data for multiple steps    
    dataset = dataset.batch(BATCH_SIZE)
    # Fetch batches in the background while the model is training.
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    
    return dataset
    
def visualize_image(original, augmented):
    org_img = tf.keras.utils.array_to_img(original)
    fig = plt.figure()
    plt.subplot(1,2,1)
    plt.title('Original image')
    plt.imshow(org_img)

    aug_img = tf.keras.utils.array_to_img(augmented)
    plt.subplot(1,2,2)
    plt.title('Augmented image')
    plt.imshow(aug_img)


def predict_new_image(img_file):    
    #img = tf.keras.utils.load_img(
    #    img_file, target_size=(IMG_HEIGHT, IMG_WIDTH), keep_aspect_ratio=True
    #)
    img = tf.keras.utils.load_img(
        img_file, target_size=None, keep_aspect_ratio=True
    )

    img_array = tf.keras.utils.img_to_array(img)
    img_array = tf.keras.preprocessing.image.smart_resize(img_array, size=(IMG_HEIGHT, IMG_WIDTH))
    img_array = tf.expand_dims(img_array, 0) # Create a batch
    image_normalized = img_array / 255.0  # tf.image.per_image_standardization(img_array)

    saved_model = load_model(model_file)

    predictions = model.predict(image_normalized)
    score = tf.nn.softmax(predictions[0])
    #score = tf.sigmoid(predictions[0])
    #score = tf.nn.sigmoid_cross_entropy_with_logits(labels=class_names, logits=predictions[0])
    #score = tf.math.sigmoid(predictions[0])
    #score = tf.tanh(predictions[0])

    #model.evaluate(img_array)
    plt.imshow(img)
    plt.show()

    #print(predictions[0])

    print(
        "This image most likely belongs to {} with a {:.2f} percent confidence."
        .format(class_names[np.argmax(score)], 100 * np.max(score))
    )
    print()

In [80]:
X, Y = readImagesFromDir()

X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2, shuffle=True, random_state=SEED)

lb = LabelBinarizer()
lb.fit(y_train)
class_names = lb.classes_
# Loop over all labels and show them    
N_LABELS = len(class_names)
for (i, label) in enumerate(class_names):
    print("{}. {}".format(i, label))

# transform the targets of the training and test sets
y_train_bin = lb.transform(y_train)
y_val_bin = lb.transform(y_val)

print(y_train_bin.shape)
print(y_val_bin.shape)

['arowana', 'betta', 'goldfish', 'luohan']
0. arowana
1. betta
2. goldfish
3. luohan
(3297, 4)
(825, 4)


In [81]:
# Print examples of fish and their binary targets
for i in range(10):
    print(X_train[len(X_train)-1 - i], y_train_bin[len(y_train_bin)-1 - i])

dataset/betta/betta0194.jpg [0 1 0 0]
dataset/arowana/Arrowana10533.jpg [1 0 0 0]
dataset/goldfish/Goldfish1515.jpg [0 0 1 0]
dataset/arowana/Arrowana10911.jpg [1 0 0 0]
dataset/betta/betta0357.jpg [0 1 0 0]
dataset/luohan/FHCichlid100114.jpg [0 0 0 1]
dataset/goldfish/Goldfish1548.jpg [0 0 1 0]
dataset/luohan/FHCichlid100525.jpg [0 0 0 1]
dataset/goldfish/Goldfish1743.jpg [0 0 1 0]
dataset/luohan/FHCichlid100096.jpg [0 0 0 1]


In [82]:
# train_ds = []
# val_ds = []

# for m in memebers_param:
#     IMG_HEIGHT = m["img_height"]
#     IMG_WIDTH = m["img_width"]
#     train_ds.append(create_dataset(X_train, y_train_bin))
#     val_ds.append(create_dataset(X_val, y_val_bin, is_training=False))

train_ds = create_dataset(X_train, y_train_bin)
val_ds = create_dataset(X_val, y_val_bin, is_training=False)

# for images, labels in train_ds.take(1):
#     print("Shape of features array:", images.numpy().shape)
#     print("Shape of labels array:", labels.numpy().shape)
#     #plt.imshow(f.numpy().astype("uint8"))
#     for i in range(5):
#         ax = plt.subplot(2, 3, i + 1)
#         img = tf.keras.utils.array_to_img(images[i])
#         plt.imshow(img)
#         plt.title(class_names[np.argmax(labels[i])])
#         plt.axis("off")

ValueError: Value [<tf.Tensor 'truediv:0' shape=(160, 224, 3) dtype=float32>, <tf.Tensor 'truediv_1:0' shape=(128, 128, 3) dtype=float32>, <tf.Tensor 'truediv_2:0' shape=(128, 128, 3) dtype=float32>, <tf.Tensor 'truediv_3:0' shape=(128, 128, 3) dtype=float32>, <tf.Tensor 'truediv_4:0' shape=(128, 128, 3) dtype=float32>] is not convertible to a tensor with dtype <dtype: 'float32'> and shape (5, None, None, 3).

In [74]:
def add_prefix(model, prefix: str, custom_objects=None):
    config = model.get_config()
    new_to_old = {}
    for layer in config['layers']:
#         print(layer)
        new_name = prefix + layer['config']['name']
        new_to_old[new_name] = layer['config']['name']
#         layer['name'] = new_name
        layer['config']['name'] = new_name

    new_model = tf.keras.Sequential().from_config(config, custom_objects)
    
    for layer in new_model.layers:
        layer.set_weights(model.get_layer(new_to_old[layer.name]).get_weights())
    
    return new_model

# load models from file
def load_all_models(memebers_param):
    all_models = list()
    prefix = "ensemble_"
    c = 0
    for m in memebers_param:
        # define filename for this ensemble
        filename = m["filePath"]
        # load model from file
        model = load_model(filename)
        # add to list of members
        all_models.append(add_prefix(model,prefix+str(c)))
        c+=1
        print('>loaded %s' % filename)
    return all_models
 
# define stacked model from multiple member input models
def define_stacked_model(members):
    # update all layers in all models to not be trainable
    for i in range(len(members)):
        model = members[i]
        for layer in model.layers:
            # make not trainable
            layer.trainable = False
    # define multi-headed input
    ensemble_visible = [model.input for model in members]
    # concatenate merge output from each model
    ensemble_outputs = [model.output for model in members]
    merge = concatenate(ensemble_outputs)
    hidden = Dense(12, activation='relu')(merge)
    output = Dense(N_LABELS, activation='softmax')(hidden)
    model = Model(inputs=ensemble_visible, outputs=output)

    # compile
    model.compile(loss='categorical_crossentropy', optimizer=optmz, metrics=['accuracy'])
    return model

model = define_stacked_model(load_all_models(memebers_param))
model.summary()

>loaded ./models/medium_224_20_64_1_2022-09-25_19-33-18.hdf5
>loaded ./models/arowana_softmax_128_50_64_1_2022-09-25_22-34-38.hdf5
>loaded ./models/betta_softmax_128_50_64_1_2022-09-26_17-51-39.hdf5
>loaded ./models/goldfish_softmax_128_50_64_1_2022-09-25_22-37-40.hdf5
>loaded ./models/luohan_softmax_128_50_64_1_2022-09-25_22-42-17.hdf5
Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 ensemble_0conv2d_495_input (In  [(None, 160, 224, 3  0          []                               
 putLayer)                      )]                                                                
                                                                                                  
 ensemble_0conv2d_495 (Conv2D)  (None, 160, 224, 32  2432        ['ensemble_0conv2d_495_input[0][0
                                )                 

 oling2D)                                                                                         
                                                                                                  
 ensemble_4max_pooling2d (MaxPo  (None, 64, 64, 32)  0           ['ensemble_4conv2d[0][0]']       
 oling2D)                                                                                         
                                                                                                  
 ensemble_0dense_573 (Dense)    (None, 512)          393728      ['ensemble_0dense_572[0][0]']    
                                                                                                  
 ensemble_1conv2d_1 (Conv2D)    (None, 64, 64, 16)   4624        ['ensemble_1max_pooling2d[0][0]']
                                                                                                  
 ensemble_2conv2d_1 (Conv2D)    (None, 64, 64, 16)   4624        ['ensemble_2max_pooling2d[0][0]']
          

Total params: 11,182,212
Trainable params: 208
Non-trainable params: 11,182,004
__________________________________________________________________________________________________


In [75]:
                                                                                # Step 1
modelname       = modelname+"_"+str(datetime.datetime.now())[:-7].replace(' ','_').replace(":",'-')
folderpath      = 'models/'
model_json      = folderpath + modelname + ".json"
with open(model_json, "w") as json_file:
    json_file.write(model.to_json())
model_file      = folderpath + modelname + ".hdf5"
checkpoint      = ModelCheckpoint(model_file, 
                                  monitor='val_accuracy', 
                                  verbose=0, 
                                  save_best_only=True, 
                                  mode='max')

csv_logger      = CSVLogger(folderpath+modelname +'.csv')                       # Step 2
callbacks_list  = [checkpoint,csv_logger]                                       # Step 3

print("Path to model:", model_file)
print("Path to log:  ", folderpath+modelname+'.csv')

Path to model: models/arbitrator_softmax_50_32_1_2022-10-07_22-22-23.hdf5
Path to log:   models/arbitrator_softmax_50_32_1_2022-10-07_22-22-23.csv


In [76]:
import time as time

start = time.time()
history = model.fit(train_ds,                            # Training data and label
          validation_data=val_ds,   # Validation data and label
          epochs=EPOCHS,                       # The amount of epochs to be trained
          batch_size=BATCH_SIZE,                   
          shuffle=True,                     # To shuffle the training data
          callbacks=callbacks_list)         # Callbacks to execute the checkpoints

end = time.time()
duration = round(((end - start)/60), 2)
print("duration = ", duration, " minutes")

Epoch 1/50


ValueError: in user code:

    File "C:\Users\song\anaconda3\envs\tfgpu\lib\site-packages\keras\engine\training.py", line 1051, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\song\anaconda3\envs\tfgpu\lib\site-packages\keras\engine\training.py", line 1040, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\song\anaconda3\envs\tfgpu\lib\site-packages\keras\engine\training.py", line 1030, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\song\anaconda3\envs\tfgpu\lib\site-packages\keras\engine\training.py", line 889, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\song\anaconda3\envs\tfgpu\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\song\anaconda3\envs\tfgpu\lib\site-packages\keras\engine\input_spec.py", line 200, in assert_input_compatibility
        raise ValueError(f'Layer "{layer_name}" expects {len(input_spec)} input(s),'

    ValueError: Layer "model_4" expects 5 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 5, 128, 128, 3) dtype=float32>]


In [10]:
plotpath  = folderpath+modelname+'_plot.png'
plot_model(model, 
           to_file=plotpath, 
           show_shapes=True, 
           show_layer_names=False,
           rankdir='TB')
print("Path to plot:", plotpath)

Path to plot: models/arbitrator_softmax_50_32_1_2022-10-06_20-44-16_plot.png


In [11]:
                                                                                # Step 1
modelGo = load_model(model_file)

predicts    = modelGo.predict(val_ds)                                            # Step 2
print("Prediction completes.")

OSError: No file or directory found at models/arbitrator_softmax_50_32_1_2022-10-06_20-44-16.hdf5

In [12]:
                                                                                # Step 1
                                                                                # Step 2
predout     = np.argmax(predicts,axis=1)
testout     = np.argmax(y_val_bin,axis=1)

testScores  = metrics.accuracy_score(testout,predout)                           # Step 3

                                                                                # Step 4
print("Best accuracy (on testing dataset): %.2f%%" % (testScores*100))
print(metrics.classification_report(testout,
                                    predout,
                                    target_names=class_names,
                                    digits=4))

report = metrics.classification_report(testout,
                                    predout,
                                    target_names=class_names,
                                    digits=4,
                                      output_dict=True)

df = pd.DataFrame(report).transpose()
df.to_csv(folderpath+modelname+'_report.csv')

NameError: name 'predicts' is not defined