In [4]:
import tensorflow as tf
from mtcnn import MTCNN
from pathlib import Path
import pandas as pd
import glob
import cv2
import csv
import os
import ast
import pydot
import pydotplus
import graphviz
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import time

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import Callback

In [63]:
# ===================== MULTITASK MODEL SETUP =====================

input_layer = tf.keras.layers.Input(shape=(224, 224, 3), name='input_layer')

conv1 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', name='conv1')(input_layer)
maxpool1 = tf.keras.layers.MaxPooling2D((2, 2), name='maxpool1')(conv1)
dropout1 = tf.keras.layers.Dropout(0.2, name='dropout1')(maxpool1)

# Second Convolutional Block
conv2 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', name='conv2')(dropout1)
maxpool2 = tf.keras.layers.MaxPooling2D((2, 2), name='maxpool2')(conv2)
dropout2 = tf.keras.layers.Dropout(0.2, name='dropout2')(maxpool2)

# Third Convolutional Block
conv3 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', name='conv3')(dropout2)
maxpool3 = tf.keras.layers.MaxPooling2D((2, 2), name='maxpool3')(conv3)
dropout3 = tf.keras.layers.Dropout(0.2, name='dropout3')(maxpool3)

# Flatten Layer
flatten = tf.keras.layers.Flatten(name='flatten')(dropout3)

# Embedding Layer
embedding = tf.keras.layers.Dense(256, activation='relu', name='embedding')(flatten)
dropout4 = tf.keras.layers.Dropout(0.2, name='dropout4')(embedding)

# Landmarks Layers
landmarksDense_1 = tf.keras.layers.Dense(64, activation='relu', name='landmarksDense_1')(dropout4)
landmarksDropout1 = tf.keras.layers.Dropout(0.2, name='landmarksDropout1')(landmarksDense_1)
landmarksDense_2 = tf.keras.layers.Dense(64, activation='relu', name='landmarksDense_2')(landmarksDropout1)
landmarksDropout2 = tf.keras.layers.Dropout(0.2, name='landmarksDropout2')(landmarksDense_2)
landmarks = tf.keras.layers.Dense(10, activation='relu', name='landmark_output')(landmarksDropout2)

# Illuminance Layers
illuminanceDense_1 = tf.keras.layers.Dense(64, activation='relu', name='illuminanceDense_1')(dropout4)
illuminanceDropout1 = tf.keras.layers.Dropout(0.2, name='illuminanceDropout1')(illuminanceDense_1)
illuminanceDense_2 = tf.keras.layers.Dense(64, activation='relu', name='illuminanceDense_2')
illuminanceDropout2 = tf.keras.layers.Dropout(0.2, name='illuminanceDropout2')(illuminanceDense_2)
illum = tf.keras.layers.Dense(1, activation='relu', name='previous_illuminance_output')(illuminanceDropout2)

# Retinex Layers

# Reshape
reshape = tf.keras.layers.Reshape((4, 4, 16), name='reshape')(dropout4)

# Deconvolution Block 1
deconv1 = tf.keras.layers.Conv2DTranspose(128, (2, 2), activation='relu', name='deconv1')(reshape)
dropout1 = tf.keras.layers.Dropout(0.2, name='retinexDropout1')(deconv1)
upsample1 = tf.keras.layers.UpSampling2D((2, 2), name='upsample1')(dropout1)  # Adjust size

# Deconvolution Block 2
deconv2 = tf.keras.layers.Conv2DTranspose(64, (4, 4), activation='relu', name='deconv2')(upsample1)
dropout2 = tf.keras.layers.Dropout(0.2, name='retinexDropout2')(deconv2)
upsample2 = tf.keras.layers.UpSampling2D((2, 2), name='upsample2')(dropout2)  # Adjust size

# Deconvolution Block 3
deconv3 = tf.keras.layers.Conv2DTranspose(32, (7, 7), activation='relu', name='deconv3')(upsample2)
dropout3 = tf.keras.layers.Dropout(0.2, name='retinexDropout3')(deconv3)
upsample3 = tf.keras.layers.UpSampling2D((7, 7), name='upsample3')(dropout3)  # Adjust size

# Conv2D layer for final output
retIllum = tf.keras.layers.Conv2D(3, kernel_size=(3, 3), activation='relu', padding='same', name='image_retinex_output')(upsample3)

task_outputs = [landmarks, illum, retIllum]

multi_task_model = tf.keras.Model(inputs=input_layer, outputs=task_outputs)

# Compile the model with specific loss functions and metrics for each task
multi_task_model.compile(
    optimizer=Adam(learning_rate=1e-6),
    loss={
        'landmark_output': 'mean_squared_error',
        'previous_illuminance_output': 'mean_squared_error',
        'image_retinex_output': 'mean_squared_error'
    },
    metrics={
        'landmark_output': ['mse', "mae"],
        'previous_illuminance_output': ['mse', "mae"],
        'image_retinex_output': ['mse', "mae"]
    }
)

# Summary of the multi-task model
multi_task_model.summary()

Model: "model_36"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1 (Conv2D)                 (None, 222, 222, 64  1792        ['input_layer[0][0]']            
                                )                                                                 
                                                                                                  
 maxpool1 (MaxPooling2D)        (None, 111, 111, 64  0           ['conv1[0][0]']                  
                                )                                                          

In [64]:
# ===================== MULTI-TASK DATA GEN =====================

class CustomDataGen(tf.keras.utils.Sequence):
    
    def __init__(self, df, X_col, y_col,
                 batch_size,
                 input_size=(224, 224, 3),
                 shuffle=True,
                 random_seed=None):  # Add a new parameter for random seed
        
        self.df = df.copy()
        self.X_col = X_col
        self.y_col = y_col
        self.batch_size = batch_size
        self.input_size = input_size
        self.shuffle = shuffle
        self.random_seed = random_seed  # Store the random seed
        
        self.n = len(self.df)
        self.n_coords = 2  # Assuming landmark coordinates are 2-dimensional
        self.n_illuminance = 1  # Assuming a single illuminance value
    
    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1, random_state=self.random_seed).reset_index(drop=True)  # Use the random seed
    
    def __get_input(self, path, target_size):
    
        image = tf.keras.preprocessing.image.load_img(path)
        image_arr = tf.keras.preprocessing.image.img_to_array(image)

        image_arr = tf.image.resize(image_arr, (target_size[0], target_size[1])).numpy()

        return image_arr / 255.
    
    def __get_output(self, label, output_type):
        # Assuming output_type is 'coordinates', 'illuminance', or 'adjusted_image_path'
        if output_type == 'coordinates':
            # Assuming label is a string containing a dictionary-like structure
            # Safely evaluate the string as a literal dictionary using ast.literal_eval
            coordinates_dict = ast.literal_eval(label)
            
            # Extract x and y coordinates for each landmark
            landmarks = ['left_eye', 'right_eye', 'nose', 'mouth_left', 'mouth_right']
            coordinates_list = [coordinates_dict[landmark] for landmark in landmarks]
            
            # Flatten the list and convert to numpy array
            coordinates_array = np.array([coord for landmark_coords in coordinates_list for coord in landmark_coords])
            
            coordinates_array = normalize_coordinates(coordinates_array, 640, 480)

#             print("Shape of landmarks_array:", coordinates_array.shape)
            
            # If there are exactly 10 values, return the array, otherwise raise an error
            if len(coordinates_array) == 10:
                return coordinates_array
            else:
                raise ValueError("Expected 10 coordinates, but found {}".format(len(coordinates_array)))
        elif output_type == 'illuminance':
            # Convert the illuminance value to a float
            return float(label)
        elif output_type == 'adjusted_image_path':
            # Assuming label is the path to the adjusted image
            return self.__get_input(label, self.input_size)
    
    def __get_data(self, batches):
        # Generates data containing batch_size samples

        path_batch = batches[self.X_col['path']]
        
        coords_batch = batches[self.y_col['coordinates']]
        illuminance_batch = batches[self.y_col['illuminance']]
        adjusted_image_path_batch = batches[self.y_col['adjusted_image_path']]

        X_batch = np.asarray([self.__get_input(x, self.input_size) for x in path_batch])

        y0_batch = np.asarray([self.__get_output(y, 'coordinates') for y in coords_batch])
        y1_batch = np.asarray([self.__get_output(y, 'illuminance') for y in illuminance_batch])
        y2_batch = np.asarray([self.__get_output(y, 'adjusted_image_path') for y in adjusted_image_path_batch])

        return X_batch, [y0_batch, y1_batch, y2_batch]
    
    def __getitem__(self, index):
        batches = self.df[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self.__get_data(batches)
        
        return X, y
    
    def __len__(self):
        return self.n // self.batch_size

def normalize_coordinates(coordinates, original_width, original_height):
    normalized_coordinates = np.zeros_like(coordinates, dtype=np.float32)
    
    for i in range(0, len(coordinates), 2):
        x = coordinates[i]
        y = coordinates[i + 1]
        
        normalized_x = (x / original_width)
        normalized_y = (y / original_height)
        
        normalized_coordinates[i] = normalized_x
        normalized_coordinates[i + 1] = normalized_y
    
    return normalized_coordinates

In [73]:
# ===================== DATA GEN SETUP =====================

train_df = pd.read_csv("./data/training.csv") # path to train_data csv
train_df["Filename"] = "./data/Training/" + train_df["Filename"]
train_df["Retinex"] = "./data/RetTraining/retinex_results_train/" + train_df["Retinex"]

# Define column indices or names for X and y
X_col = {'path': 'Filename'}
y_col = {'illuminance': 'Illuminance', 'coordinates': 'Landmarks', 'adjusted_image_path': 'Retinex'}

# Create an instance of CustomDataGen
train_gen = CustomDataGen(train_df, X_col, y_col, batch_size=32, input_size=(224, 224, 3), random_seed=438)

eval_df = pd.read_csv("./data/evaluation.csv") # path to eval_data csv
eval_df["Filename"] = "./data/Evaluation/" + eval_df["Filename"]
eval_df["Retinex"] = "./data/RetEvaluation/retinex_results_eval/" + eval_df["Retinex"]

# Define column indices or names for X and y
eval_X_col = {'path': 'Filename'}
eval_y_col = {'illuminance': 'Illuminance', 'coordinates': 'Landmarks', 'adjusted_image_path': 'Retinex'}

val_gen = CustomDataGen(eval_df, eval_X_col, eval_y_col, batch_size=32, input_size=(224, 224, 3), random_seed=438)
# print(eval_df)
# print(train_df)

In [74]:
class DynamicLearningRateScheduler(Callback):
    def __init__(self, monitor_metric='val_loss', patience=3, factor=0.5, min_lr=1e-6):
        super(DynamicLearningRateScheduler, self).__init__()
        self.monitor_metric = monitor_metric
        self.patience = patience
        self.factor = factor
        self.min_lr = min_lr
        self.wait = 0
        self.best_metric = float('inf')

    def on_epoch_end(self, epoch, logs=None):
        current_metric = logs.get(self.monitor_metric)

        if current_metric is None:
            raise ValueError(f"Metric {self.monitor_metric} not found in training logs.")

        if current_metric < self.best_metric:
            self.best_metric = current_metric
            self.wait = 0
        else:
            self.wait += 1
            if self.wait >= self.patience:
                new_lr = max(self.model.optimizer.lr * self.factor, self.min_lr)
                self.model.optimizer.lr = new_lr
                print(f"\nLearning rate reduced to {new_lr}.")
                self.wait = 0

dynamicLearningCallback = DynamicLearningRateScheduler(monitor_metric='val_loss', patience=3, factor=0.5, min_lr=1e-8)

In [75]:
history = multi_task_model.fit(train_gen, epochs=20, validation_data=val_gen, callbacks=[dynamicLearningCallback])

Epoch 1/20
 48/940 [>.............................] - ETA: 25:21 - loss: 0.6375 - landmark_output_loss: 0.2040 - previous_illuminance_output_loss: 0.2361 - image_retinex_output_loss: 0.1974 - landmark_output_mse: 0.2040 - landmark_output_mae: 0.3804 - previous_illuminance_output_mse: 0.2361 - previous_illuminance_output_mae: 0.2581 - image_retinex_output_mse: 0.1974 - image_retinex_output_mae: 0.3741

KeyboardInterrupt: 