In [2]:
# Preconditions

import os
import json
from PIL import Image
import numpy as np
import keras.api._v2.keras as keras
from keras.applications import VGG16, ResNet50
from keras.callbacks import EarlyStopping, TensorBoard
from keras.callbacks import ReduceLROnPlateau
from keras import regularizers
import tensorflow as tf

import random
import helper as hp

MODEL_NAME = "v1_no_base_50000" + ".keras"
MODEL_PATH = os.path.join("..", "tmp", "models")

RESSOURCES_PATH = os.path.join("..", "tmp", "train", "ressources", "bundle")
TRAIN_DATA_FOLDER = [
    "1f7534fa-6ee2-4e6a-a921-2a635a5fe917", 
    "9ef5620b-c769-49ba-b083-7cbc25fe7ec6",
    "56dd460c-200d-4248-b6d5-b61bd0681fd7",
    "81370b3a-0747-44f7-bdd9-08279027b99a",
    "470833f8-ddaf-4aca-8a65-e30f80504a8a",
    # "32831052-7914-4220-bde4-c970c9c6c404",
    # "82612320-658a-495f-9d10-e54b35471628",
    # "a04111fd-ac8f-44b4-b1af-fe8f67252098",
    # "b8768080-7bdd-43c1-ab11-b940b74b07ef",
    # "efc4db14-5e6b-4c14-8543-997997d55476"
]

# Image properties
IMAGE_HEIGHT_PX = 120
IMAGE_WIDTH_PX = 160
CROP_HEIGHT_PX = 5
CROP_WIDTH_PX = 20

NUM_CHANNELS = 3
NUM_CLASSES = 4
NUM_POSITIONS = 8

color_mapping = { 'red': 0, 'yellow': 1, 'blue': 2, '': 3 }
label_mapping = { 0: 'red', 1: 'yellow', 2: 'blue', 3: ''}

NORMALIZE_VALUE = 255

def map_labels_to_nummeric(label):
    mapped_label = []

    for pos in label.values():
        mapped_label.append(color_mapping[pos])

    return mapped_label

# Normalize the images so that all values are between 0 and 1
def normalize_images(images):
    return images / NORMALIZE_VALUE


# Data loading
IN_DEBUG_MODE = False
IMAGE_FOLDER = "Images"
LABELS_FOLDER = "Labels"
JSON_NAME = "scene_results.json"

def get_data(stage):
    labels = []
    images = []

    for train_folder in TRAIN_DATA_FOLDER:

        scene_results_path = os.path.join(RESSOURCES_PATH, train_folder, stage, JSON_NAME)

        if IN_DEBUG_MODE:
            print("CURRENT STAGE: " + stage + "\n")
            print("READING SCENE RESULTS AT: " + scene_results_path)

        with open(scene_results_path, 'r') as file:
            scene_results = json.load(file)

        for result in scene_results:

            img = [];

            image_not_found = False

            if len(result["imagePaths"]) != 2:
                continue

            for img_path in result["imagePaths"]:

                if IN_DEBUG_MODE:
                    print("READING IMAGE AT: " + img_path)

                try:
                    i = Image.open(os.path.join(RESSOURCES_PATH, train_folder, img_path))
                except:
                    image_not_found = True
                    continue


                # Scale down image (resize)
                i = i.resize((IMAGE_WIDTH_PX, IMAGE_HEIGHT_PX))

                # Channel order of Pillow is different than OpenCV
                i = np.array(i)
                i = hp.Preprocess.convert_to_BGR(i)

                i = hp.Video.translate_image(i)

                # Crop the image
                i = i[0:115, 10:150]

                i = hp.Augmentation.black_spots(i, 10)

                i = hp.Preprocess.start(i)

                img.append(i)

            if image_not_found == False:
                images.append(img)
                try:
                    # np.array(images) # is only necessary to check if the data is homogenous
                    labels.append(result["positions"])
                except:
                    print(f"Images shape got inhomogenous at: ${result['imagePaths']}")
                    images.pop()

        if IN_DEBUG_MODE:
            print("\n\n")
    


    return [np.array(images), np.array(labels)]

In [3]:
# Base-Model generation
LOSS_FUNCTION = 'categorical_crossentropy'

input_branch_1 = keras.layers.Input(shape=(IMAGE_HEIGHT_PX - CROP_HEIGHT_PX, IMAGE_WIDTH_PX - CROP_WIDTH_PX, 3))
input_branch_2 = keras.layers.Input(shape=(IMAGE_HEIGHT_PX - CROP_HEIGHT_PX, IMAGE_WIDTH_PX - CROP_WIDTH_PX, 3))

# Shared convolutional layers for image processing
convolutional_layers = [
    keras.layers.Conv2D(32, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Conv2D(128, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Flatten()
]

# Process first image
x1 = input_branch_1
for layer in convolutional_layers:
    x1 = layer(x1)

# Process second image
x2 = input_branch_2
for layer in convolutional_layers:
    x2 = layer(x2)

x = keras.layers.Concatenate(axis=-1)([x1, x2])
x = keras.layers.Dense(256, activation='relu')(x)
x = keras.layers.Dropout(0.2)(x)
x = keras.layers.Dense(NUM_POSITIONS * NUM_CLASSES, activation='softmax')(x)  # Output layer with 8 * 4 units

output = keras.layers.Reshape((NUM_POSITIONS, NUM_CLASSES))(x)

# Build the model with the two input branches and the output layer
base_model = keras.models.Model(inputs=[input_branch_1, input_branch_2], outputs=output)

optimizer = keras.optimizers.legacy.Adam(learning_rate=0.001)

base_model.compile(optimizer=optimizer, loss=LOSS_FUNCTION, metrics=['accuracy', 'mean_squared_error'])

In [33]:

# Base model with VGG16

# Input layers for the two images
input_branch_1 = keras.layers.Input(shape=(IMAGE_HEIGHT_PX - CROP_HEIGHT_PX, IMAGE_WIDTH_PX - CROP_WIDTH_PX, 3))
input_branch_2 = keras.layers.Input(shape=(IMAGE_HEIGHT_PX - CROP_HEIGHT_PX, IMAGE_WIDTH_PX - CROP_WIDTH_PX, 3))

# Load the pre-trained VGG16 model (without the top layer)
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(IMAGE_HEIGHT_PX - CROP_HEIGHT_PX, IMAGE_WIDTH_PX - CROP_WIDTH_PX, 3))

# Freeze the pre-trained layers (optional)
for layer in base_model.layers:
  layer.trainable = False

# Extract features from both images using the VGG16 base model
x1 = base_model(input_branch_1)
x2 = base_model(input_branch_2)

# Concatenate the extracted features
x = keras.layers.Concatenate(axis=-1)([x1, x2])

# Add custom layers for your specific task
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
# x = keras.layers.Dropout(0.3)(x)
x = keras.layers.Dense(NUM_POSITIONS * NUM_CLASSES, activation='softmax')(x)  # Output layer with 8 * 4 units

# Final output layer for multi-class classification (adjust based on your problem)
output = keras.layers.Reshape((NUM_POSITIONS, NUM_CLASSES))(x)

# Create the final model with two inputs and one output
base_model = keras.models.Model(inputs=[input_branch_1, input_branch_2], outputs=output)

# Compile the model (adjust optimizer and loss function as needed)
base_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', 'mean_squared_error'])

In [2]:

# Base model with RESNET

# Input layers for the two images
input_branch_1 = keras.layers.Input(shape=(IMAGE_HEIGHT_PX - CROP_HEIGHT_PX, IMAGE_WIDTH_PX - CROP_WIDTH_PX, 3))
input_branch_2 = keras.layers.Input(shape=(IMAGE_HEIGHT_PX - CROP_HEIGHT_PX, IMAGE_WIDTH_PX - CROP_WIDTH_PX, 3))

# Load the pre-trained ResNET50 model (without the top layer)
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMAGE_HEIGHT_PX - CROP_HEIGHT_PX, IMAGE_WIDTH_PX - CROP_WIDTH_PX, 3))

# Freeze the pre-trained layers (optional)
for layer in base_model.layers:
  layer.trainable = False

# Extract features from both images using the ResNet50 base model
x1 = base_model(input_branch_1)
x2 = base_model(input_branch_2)

# Concatenate the extracted features
x = keras.layers.Concatenate(axis=-1)([x1, x2])

# Add custom layers for your specific task
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
# x = keras.layers.Dropout(0.3)(x)
x = keras.layers.Dense(NUM_POSITIONS * NUM_CLASSES, activation='softmax')(x)  # Output layer with 8 * 4 units

# Final output layer for multi-class classification (adjust based on your problem)
output = keras.layers.Reshape((NUM_POSITIONS, NUM_CLASSES))(x)

# Create the final model with two inputs and one output
base_model = keras.models.Model(inputs=[input_branch_1, input_branch_2], outputs=output)

# Compile the model (adjust optimizer and loss function as needed)
base_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', 'mean_squared_error'])

In [13]:
# Re-train existing model
base_model = keras.models.load_model(os.path.join(MODEL_PATH, MODEL_NAME))

In [5]:
train_data = get_data("Train")
verify_data = get_data("Verify")

In [6]:
# Train model
early_stopping = EarlyStopping(monitor='val_loss', patience=5, min_delta=0.01, restore_best_weights=True)
learning_rate_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2)
tensorboard_callback = TensorBoard(log_dir='./logs')

IN_DEBUG_MODE = False
EPOCS = 50

def fit_model(model, train_data, verify_data):
    train_images = normalize_images(np.array(train_data[0]))
    numberic_train_labels = np.array([map_labels_to_nummeric(label) for label in train_data[1]])
    train_labels = keras.utils.to_categorical(numberic_train_labels, num_classes=NUM_CLASSES)

    verify_images = normalize_images(np.array((verify_data[0])))
    numberic_verify_labels = np.array([map_labels_to_nummeric(label) for label in verify_data[1]])
    verify_labels = keras.utils.to_categorical(numberic_verify_labels, num_classes=NUM_CLASSES)

    if IN_DEBUG_MODE: 
        print("----- SHAPES ------\n")
        print(f"Train labels shape: {train_labels.shape}")
        print(f"Train images shape: {train_images.shape}")

        print(f"Verify labels shape: {verify_labels.shape}")
        print(f"Verify images shape: {verify_images.shape}\n\n")

    model.fit(
        [train_images[:, 0], train_images[:, 1]], train_labels, 
        epochs=EPOCS, 
        validation_data=([verify_images[:, 0], verify_images[:, 1]], verify_labels), verbose=1,
        callbacks=[early_stopping, learning_rate_scheduler, tensorboard_callback])
    
    return model

trained_model = fit_model(base_model, train_data, verify_data)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50


In [None]:
# v6 --> Dropout 20%, l2: 0.02
# v7 --> Dropout 30%, l2: 0.002
# v8 --> Dropout 0, l2: 0.002

In [7]:
# Save trained model 
trained_model.save(os.path.join(MODEL_PATH, MODEL_NAME))

In [13]:
# Load model
model = keras.models.load_model(os.path.join(MODEL_PATH, MODEL_NAME))

In [14]:
test_data = get_data("Test")

In [15]:
# Test model
test_labels = np.array([map_labels_to_nummeric(label) for label in test_data[1]])

if IN_DEBUG_MODE:
    model.summary()

test_images = normalize_images(np.array((test_data[0])))
predictions = model.predict([test_images[:, 0], test_images[:, 1]])

label_index = random.randint(0, len(test_labels)-1)

print("\n\n")
print(f"------ PREDICTION: Index {label_index + 1} --------\n")
predicted_nummeric = np.argmax(predictions, axis=-1)
predicted_readable = np.vectorize(label_mapping.get)(predicted_nummeric)
actual_readable = np.vectorize(label_mapping.get)(test_labels)


print("NUMMERIC: \n")
print(predicted_nummeric[label_index])
print("READABLE: \n")
print(predicted_readable[label_index])
print("\n\n")

print(f"------ ACTUAL: Index {label_index + 1} ------ \n")
print("NUMMERIC: \n")
print(test_labels[label_index])
print("READABLE: \n")
print(actual_readable[label_index])




------ PREDICTION: Index 818 --------

NUMMERIC: 

[1 1 2 3 3 0 3 3]
READABLE: 

['yellow' 'yellow' 'blue' '' '' 'red' '' '']



------ ACTUAL: Index 818 ------ 

NUMMERIC: 

[1 1 2 3 3 0 3 3]
READABLE: 

['yellow' 'yellow' 'blue' '' '' 'red' '' '']
