Disclaimer:
This is the final master notebook, it is not meant to be able to run as a whole as all of us used our own drive links and ran them separately. The actual run of the individual steps are available in the other files in the git repo.

# Data Preprocessing

Before doing any actual training, we have to make sure the data is up to our standards to ensure the best possible accuracy.

## Enhanced labeling of real photos
[Link to GitHub where it showcases some runs of the codes below.](https://github.com/Frostforus/DTU_Deep_Learning_Image_Segmentation_Project/blob/farkas/process_real_imageset.ipynb)

The real world data we were provided had several data quality issues. The biggest issue was that the masks were incorrect. We only found this out after looking through the initial numpy arrays that were provided. Using the images we used a clever way to retreive the masks from

In [None]:
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
import colorsys
import scipy

image_folder = '../carseg_data/images/photo/with_segmentation/'
array_folder = '../carseg_data/arrays/'
new_array_folder = 'data/new_arrays/'

### Finding the right HSV thresholds
In order to correctly detect the colored areas on the jpg images we had to find the corresponding color thresholds. This was done by first visualizing histograms of the pixel values in the images. With the detected color values, we tested multiple color thresholds, by visualizing the results.

In [None]:
# Creating a histogram of the hue values of a predefined image and showing the colours of the peak hue values on the histogram (to connect them to the parts)

image_number = 1 # which image to use from the imageset
colors_to_show = 20 # how many colours to visualize based on the hue peaks

im = cv2.imread(os.path.join(image_folder, str(image_number).zfill(4)+'.jpg'))
arr = np.load(os.path.join(array_folder, 'photo_'+str(image_number).zfill(4)+'.npy'))

# makew the pixels labeled as background black
im[np.where(arr[...,-1]==0)] = [0, 0, 0]

cv2.imshow('image', im)
cv2.waitKey(0)
cv2.destroyAllWindows()

im = cv2.cvtColor(im, cv2.COLOR_BGR2HSV) # convert the image to hsv

# plot the histogram with 100 bins
plt.hist(im[...,0].flatten()[im[...,0].flatten()!=0], bins = 100)
plt.show()

# calculate the number of elements in each bin
hist, bin_edges = np.histogram(im[...,0].flatten()[im[...,0].flatten()!=0], bins=100)

print('Peak H values:')
for i in range(1, colors_to_show+1):

    # determine the i'th most common hue value based on the histogram
    h = round((bin_edges[hist.argsort()[-i]] + bin_edges[hist.argsort()[-i]+1]) / 2)
    print(h)

    # plot a square of the color
    sample_color = (np.array(colorsys.hsv_to_rgb(h/179,1,1)) * 255).astype(int)
    print(sample_color)
    sample_im = np.empty((1,1,3))
    sample_im[...] = sample_color
    sample_im = sample_im.astype(int)
    plt.imshow(sample_im)
    plt.show()

In [None]:
image_number = 48 # which image to use from the imageset

# use only the pixels at this hue value with this threshold
color_h = 60
color_h_threshold = 5

im = cv2.imread(os.path.join(image_folder, str(image_number).zfill(4)+'.jpg'))
im_hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)

v_list = []

# Collect the value parameters from the images when the hue is close to the predefined threshold
# This way we will be able to differentiate between light / dark green and pink / purple
for i in range(im.shape[0]):
    for j in range(im.shape[1]):
        if im_hsv[i,j,0] < color_h-color_h_threshold or im_hsv[i,j,0] > color_h+color_h_threshold:
            im[i,j,:] = [0,0,0]
        else:
            v_list.append(im_hsv[i,j,2])

# Plot the values on a histogram
plt.hist(v_list, bins = 100)
plt.show()

cv2.imshow('image', im)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
image_number = 1 # which image to use from the imageset
color_h = 60
color_h_threshold = 5

im = cv2.imread(os.path.join(image_folder, str(image_number).zfill(4)+'.jpg'))
im_hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)

v_list = []

# Collect the saturation parameters from the images when the hue is close to the predefined threshold
# This way we will be able to eliminate almost all false detection from the rest of the car
for i in range(im.shape[0]):
    for j in range(im.shape[1]):
        if im_hsv[i,j,0] < color_h-color_h_threshold or im_hsv[i,j,0] > color_h+color_h_threshold:
            im[i,j,:] = [0,0,0]
        else:
            v_list.append(im_hsv[i,j,1])

# Plot the saturations on a histogram
plt.hist(v_list, bins = 100)
plt.show()

cv2.imshow('image', im)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Creating the new masks
Based on the thresholds found by our visualizations we can recondtruct the correct masks from the jpg images. The detected areas below a defined pixel size were removed to get rid of false detection. Furthermore a morphological close was also applied to the masks to fill the few undetected pixels in the parts.

In [None]:
image_list = os.listdir(image_folder)

# minimal contiguous area to be detected as a part
min_mask_area = 40

# go though all the jpg images
for image_name in image_list:
    im = cv2.imread(os.path.join(image_folder, image_name))

    image_number = int(image_name[:-4])
    arr = np.load(os.path.join(array_folder, 'photo_'+str(image_number).zfill(4)+'.npy'))

    im = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)

    # define the new array where we will store the results of our algorithm
    new_arr = arr.copy()
    new_arr[...,-1].fill(0)

    categories = [10,20,30,40,50,60,70,80]

    # create a separate mask for each category
    masks = np.zeros((len(categories), arr.shape[0], arr.shape[1]))

    # go through every pixel one by one
    for i in range(arr.shape[0]):
        for j in range(arr.shape[1]):

            # if the pixel is not labeled as background
            if arr[i,j,-1] != 0:

                # create a label in one of the masks based on the colour of the pixel
                # if neither of these are true the pixel will be categorized as rest of the car
                if im[i,j,0] >= 10 and im[i,j,0] <= 20 and im[i,j,1] >= 180 and im[i,j,2] >= 100:
                    masks[0,i,j] = 1
                if im[i,j,0] >= 55 and im[i,j,0] <= 65 and im[i,j,1] >= 180 and im[i,j,2] >= 70 and im[i,j,2] <= 130:
                    masks[1,i,j] = 1
                if im[i,j,0] >= 25 and im[i,j,0] <= 35 and im[i,j,1] >= 180 and im[i,j,2] >= 100:
                    masks[2,i,j] = 1
                if im[i,j,0] >= 85 and im[i,j,0] <= 95 and im[i,j,1] >= 180 and im[i,j,2] >= 100:
                    masks[3,i,j] = 1
                if im[i,j,0] >= 145 and im[i,j,0] <= 155 and im[i,j,1] >= 180 and im[i,j,2] >= 100 and im[i,j,2] <= 180:
                    masks[4,i,j] = 1
                if im[i,j,0] >= 55 and im[i,j,0] <= 65 and im[i,j,1] >= 180 and im[i,j,2] >= 130:
                    masks[5,i,j] = 1
                if im[i,j,0] >= 115 and im[i,j,0] <= 125 and im[i,j,1] >= 180 and im[i,j,2] >= 100:
                    masks[6,i,j] = 1
                if im[i,j,0] >= 145 and im[i,j,0] <= 155 and im[i,j,1] >= 180 and im[i,j,2] > 180:
                    masks[7,i,j] = 1

    # go through each created mask
    for c_n, c in enumerate(categories):

        # count the contiguous area of the true areas in the current mask
        # in mask_areas every pixel will have the 0 or the size of the true area they are part of as their value
        mask_labeled, labels_n = scipy.ndimage.label(masks[c_n])
        mask_areas = np.zeros(masks[c_n].shape)
        for l in range(1,labels_n+1):
            mask_areas[mask_labeled==l] = np.sum(mask_labeled==l)
        masks[c_n,...][mask_areas<min_mask_area] = 0 # remove the areas smaller than the predefined size

        # apply a morphological close with a 5x5 circular kernel to fill the undetected pixels in bigger areas
        masks[c_n] = cv2.morphologyEx(masks[c_n], cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5)))

        # change the category labels to match the ones in the dataset from Deloitte
        for i in range(arr.shape[0]):
            for j in range(arr.shape[1]):
                if masks[c_n,i,j]:
                    new_arr[i,j,-1] = c

    # add the labels for the rest of the car to the array
    for i in range(arr.shape[0]):
        for j in range(arr.shape[1]):
            if arr[i,j,-1] != 0 and new_arr[i,j,-1] == 0:
                new_arr[i,j,-1] = 90

    # save the newly created array
    np.save(os.path.join(new_array_folder, 'photo_'+str(image_number).zfill(4)+'.npy'), new_arr)

## Data Augmentation

[Link to Google colab where it showcases some runs.](https://colab.research.google.com/drive/1fslwnPv_Lq_hmmiZrmQg6VYrtTGMAyCM?usp=sharing)



Since the test images were real images, it was essential to include as many real car images in the test dataset as possible. Only 169 real images were provided (30 out of 169 were test images), thus we augmented the real images. We applied:
*   Rotation
*   Vertical
*   Dropout
*   Gaussin blur
*   Sharpening





### Function: apply image augmentation
This function takes the original image as an input, apply different augmentation methods on it and gives the transfered image back as an output

In [None]:
import imgaug as ia
from imgaug import augmenters as iaa
import cv2
import json
import tensorflow as tf
from imgaug.augmentables.segmaps import SegmentationMapsOnImage
import imageio
import imgaug.imgaug
import time

####################################################
# AUGMENTATION
####################################################

# Set a seed based on the current time
np.random.seed(int(time.time()))

def augment_numpy_arrays(input_directory, file_names, output_directory):
    # Iterate through the file names
    for filename in file_names:
        # ia.seed(1)

        # Load the numpy array from the file path
        array = np.load(input_directory + filename)

        image = array[...,:3]
        segmap = array[...,-1].reshape((256, 256, 1))

        # Apply dropout
        dropout_rate = np.random.uniform(0.05, 0.08)
        augmented_rgb = iaa.Dropout([dropout_rate])(image=image)
        augmented_mask = segmap

        # Apply Gaussian (probability:80%)
        ugmented_rgb = iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 2)))(images=augmented_rgb)

        # # Apply sharpening
        augmented_rgb = iaa.Sharpen((0.0, 0.1))(image=image)

        # Apply rotation
        rotation_angle = 20 #np.random.uniform(-20, 20)
        augmented_rgb, augmented_mask = iaa.Affine(rotate=rotation_angle, order=0, mode="constant", cval=0)(images=[augmented_rgb,segmap])
        # augmented_rgb, augmented_mask = iaa.Affine(rotate=rotation_angle, order=0, mode="constant", cval=0)(images=[image,segmap])

        # # Apply elastic transformation
        array_rgb = iaa.ElasticTransformation(alpha=20, sigma=5)(image=array_rgb)
        array_gray = iaa.ElasticTransformation(alpha=20, sigma=5)(image=array_gray)

        # Apply horizontal flip
        augmented_rgb, augmented_mask = iaa.Fliplr(1)(images=[augmented_rgb, augmented_mask])
        # augmented_rgb, augmented_mask = iaa.Fliplr(1)(images=[image,segmap])

        # Apply brightness multiplication
        augmented_rgb =  iaa.Multiply((0.8, 1.1))(image=augmented_rgb)

        # Concatenate the RGB and mask arrays
        combined_array = np.concatenate([augmented_rgb, augmented_mask], axis=-1)

        # Save the array
        np.save(output_directory + "augmented8_" + filename, combined_array)

    print("Augmentation is done.")

### Input and output folders

In this section the input and output forders are defined for data augmentation. We take the arrays from the input folder, call the previously created augmentation function on them and save the results into the output folder.

Here the input folder is the improved arrays of the real car images

In [None]:
aug_input_path = '/content/drive/MyDrive/Saját/DTU/semester-3/Deep_Learning/DTU_Deep_Learning_Image_Segmentation_Project/data/new_arrays/'
aug_output_path = '/content/drive/MyDrive/Saját/DTU/semester-3/Deep_Learning/DTU_Deep_Learning_Image_Segmentation_Project/data/augmented_arrays_rot20_flip/'
augment_numpy_arrays(aug_input_path, black_car_image_file_names, aug_output_path)

## Adding background images to CAD images.
[Link to Google colab where it showcases some runs.](https://colab.research.google.com/drive/1EDB2RnDnX4fKyJiQDGjDHAg4gL0Xmnlj?usp=sharing)

In this section we add more realistic background images to the given CAD car images, so the model is given the chance to learn that the background can be different every time.

In [None]:
import os
import re
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

def filter_files_by_regex(directory, pattern):
    # Use os.listdir() to get a list of all files and directories in the specified directory
    file_list = os.listdir(directory)
    # Use a list comprehension to filter the files based on the regex pattern
    filtered_files = [directory+filename for filename in file_list if re.match(pattern, filename)]

    return filtered_files

### Function: load car onto given background
This function takes the background image and puts the car onto it, shifted down a bit so it isn't flying. We know where the car is using both the masks given and the absolute value of the pixels. Sadly some of the training data given for the orange cars were unusable because of the masks provided, we didn't run this on such pictures.

In [None]:
def load_car_onto_background(car_path,background_path):
    original_background = Image.open(background_path)

    # Resize background
    resized_image = original_background.resize((256, 256))

    # Convert the PIL Image to a NumPy array
    image_array2 = np.array(resized_image).astype(np.uint8)
    image_array = np.zeros((256, 256, 4)).astype(np.uint8)

    image_array[...,:3] = image_array2

    #get car image from npy file
    car_array = np.load(car_path)


    black_pixel = np.array([0,0,0])
    for i in range(256):
            for j in range(256):
              if  not (np.array_equal(car_array[i,j,3], 0)):
                if i+128 > 255 or j >255: continue
                image_array[i+128,j] = car_array[i,j]


    plt.imshow(image_array2)  # Use 'image_array_uint8' if values are in [0, 255]
    plt.title('Background Image')
    plt.axis('off')  # Optional: Turn off axis labels
    plt.show()

    plt.imshow(car_array[...,:3])  # Use 'image_array_uint8' if values are in [0, 255]
    plt.title('Car Image')
    plt.axis('off')  # Optional: Turn off axis labels
    plt.show()

    plt.imshow(image_array[...,:3])  # Use 'image_array_uint8' if values are in [0, 255]
    plt.title('Car image with background')
    plt.axis('off')  # Optional: Turn off axis labels
    plt.show()
    return image_array

### Save images to drive
In this block we save the images to the drive, where it can be moved into the training dataset.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
directory_path = 'drive/My Drive/___Deeplearning/backgrounds_roads/'
# regex_pattern = r'black_5_doors_[0-9]*\.npy'
regex_pattern = r'[0-9]*\.jpg'
background_file_names = sorted(filter_files_by_regex(directory_path, regex_pattern))

In [None]:
directory_path = 'drive/My Drive/___Deeplearning/arrays/'
# regex_pattern = r'black_5_doors_[0-9]*\.npy'
regex_pattern = r'black_5_doors_[0-9]*\.npy'
photos_nparray_file_names = sorted(filter_files_by_regex(directory_path, regex_pattern))

In [None]:
directory_path = 'drive/My Drive/___Deeplearning/arrays/'
#regex_pattern = r'black_5_doors_[0-9]*\.npy'
regex_pattern = r'orange_3_doors_[0-9]*\.npy'
orange_nparray_file_names = sorted(filter_files_by_regex(directory_path, regex_pattern))

In [None]:
def save_images_with_backgrounds(car_file_paths, background_file_paths,out_path='drive/My Drive/___Deeplearning/arrays/cars_with_backgrounds/',file_name="Black_with_background", max_load=10,shift =0, step=1):
  for i in range(shift,shift+len(car_file_paths)):
    if i >= max_load+shift: break
    #print(i)
    car_with_background = load_car_onto_background(car_file_paths[i*step],background_file_paths[i*step % len(background_file_paths)])

    np.save(out_path+f"{file_name}_{i}.npy",car_with_background)
    break


In [None]:
save_images_with_backgrounds(orange_nparray_file_names,background_file_names,file_name="Black_with_background",max_load=1000)

# UNET Definition

We used a unet model similar to the one used by the semantic shapes git repo, mentioned in our references. This provided good performance from the start. The Unet itself is a typica unet archicture, using maxpooling to shrik the dimensions in the encoding layers, and using skip layers (concat layers) and conv2dtranspose layers in the decode layers. The architecture itself helps keep locational information as well as being able to learn abstract features, while keeping fine grained details.

In [None]:
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Conv2D, Input, MaxPooling2D, concatenate, Dropout,\
                                    Lambda, Conv2DTranspose, Add
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
import numpy as np
import tensorflow as tf
import os


imshape = (256, 256, 3)
n_classes = len(data)


def preprocess_input(x):
    x /= 255.
    x -= 0.5
    x *= 2.
    return x


def dice(y_true, y_pred, smooth=1.):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def unet(pretrained=False, base=4):

    if pretrained:
        path = os.path.join('models', model_name+'.model')
        if os.path.exists(path):
            model = load_model(path, custom_objects={'dice': dice})
            model.summary()
            return model
        else:
            print('Failed to load existing model at: {}'.format(path))

    if n_classes == 1:
        loss = 'binary_crossentropy'
        final_act = 'sigmoid'
    elif n_classes > 1:
        loss = 'categorical_crossentropy'
        final_act = 'softmax'

    b = base
    i = Input((imshape[0], imshape[1], imshape[2]))
    s = Lambda(lambda x: preprocess_input(x)) (i)

    c1 = Conv2D(2**b, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (s)
    c1 = Dropout(0.1) (c1)
    c1 = Conv2D(2**b, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c1)
    p1 = MaxPooling2D((2, 2)) (c1)

    c2 = Conv2D(2**(b+1), (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p1)
    c2 = Dropout(0.1) (c2)
    c2 = Conv2D(2**(b+1), (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c2)
    p2 = MaxPooling2D((2, 2)) (c2)

    c3 = Conv2D(2**(b+2), (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p2)
    c3 = Dropout(0.2) (c3)
    c3 = Conv2D(2**(b+2), (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c3)
    p3 = MaxPooling2D((2, 2)) (c3)

    c4 = Conv2D(2**(b+3), (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p3)
    c4 = Dropout(0.2) (c4)
    c4 = Conv2D(2**(b+3), (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c4)
    p4 = MaxPooling2D(pool_size=(2, 2)) (c4)

    c5 = Conv2D(2**(b+4), (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p4)
    c5 = Dropout(0.3) (c5)
    c5 = Conv2D(2**(b+4), (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c5)

    u6 = Conv2DTranspose(2**(b+3), (2, 2), strides=(2, 2), padding='same') (c5)
    u6 = concatenate([u6, c4])
    c6 = Conv2D(2**(b+3), (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u6)
    c6 = Dropout(0.2) (c6)
    c6 = Conv2D(2**(b+3), (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c6)

    u7 = Conv2DTranspose(2**(b+2), (2, 2), strides=(2, 2), padding='same') (c6)
    u7 = concatenate([u7, c3])
    c7 = Conv2D(2**(b+2), (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u7)
    c7 = Dropout(0.2) (c7)
    c7 = Conv2D(2**(b+2), (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c7)

    u8 = Conv2DTranspose(2**(b+1), (2, 2), strides=(2, 2), padding='same') (c7)
    u8 = concatenate([u8, c2])
    c8 = Conv2D(2**(b+1), (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u8)
    c8 = Dropout(0.1) (c8)
    c8 = Conv2D(2**(b+1), (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c8)

    u9 = Conv2DTranspose(2**b, (2, 2), strides=(2, 2), padding='same') (c8)
    u9 = concatenate([u9, c1], axis=3)
    c9 = Conv2D(2**b, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u9)
    c9 = Dropout(0.1) (c9)
    c9 = Conv2D(2**b, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c9)

    o = Conv2D(n_classes, (1, 1), activation=final_act) (c9)

    model = Model(inputs=i, outputs=o)
    model.compile(optimizer=Adam(1e-4),
                  loss=loss,
                  metrics=[dice])
    model.summary()

    return model

# Hyperparameter Optimization

We ran the hyperparameter optimization for different optimizers, batch sizes, learning rates and losses to see which combination gives us the best results. For that sake, we also defined different loss functions below.
[Link to Google colab where it showcases some runs.](https://colab.research.google.com/drive/1Bnk7cO6686d2v3Ph995R3QLQyAkDe2a0?usp=sharing)

In [None]:
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.losses import binary_crossentropy, categorical_crossentropy

def binary_crossentropy_loss(y_true, y_pred):
    return binary_crossentropy(y_true, y_pred)

def categorical_cross_entropy_loss(y_true, y_pred):
    return categorical_crossentropy(y_true, y_pred)

def DiceLoss(targets, inputs, smooth=1e-6):
    inputs = K.flatten(inputs)
    targets = K.flatten(targets)
    intersection = K.sum(targets * inputs)
    return 1-(2*intersection + smooth) / (K.sum(targets) + K.sum(inputs) + smooth)

def DiceBCELoss(targets, inputs, smooth=1e-6):

    # Calculate BCE loss
    BCE = binary_crossentropy_loss(targets, inputs)

    # Calculate intersection and dice loss
    dice_loss = DiceLoss(targets, inputs)

    # Combine BCE and dice loss
    Dice_BCE = BCE + dice_loss

    return Dice_BCE

In [None]:
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam, SGD, RMSprop


# Split the data into training and validation sets
def sorted_fns(dir):
    return sorted(os.listdir(dir), key=lambda x: int(x.split('.')[0][-4:]))

all_paths = [os.path.join(directory_path, x) for x in sorted_fns(directory_path)]

# Split the data into training and validation sets
train_paths, val_paths = train_test_split(all_paths, test_size=0.2, random_state=42)

# Define hyperparameter grid
param_grid = {
    'optimizer': [Adam, RMSprop, SGD],  # Use optimizer classes, not strings (RMSprop)
    'learning_rate': [1e-5, 1e-3, 1e-4], #1e-2,
    'batch_size': [8, 16, 32],
    'loss': [DiceLoss, categorical_cross_entropy_loss, DiceBCELoss] #binary_crossentropy_loss,DiceLoss
}

# Generate all combinations of hyperparameters
param_combinations = list(ParameterGrid(param_grid))

# Perform grid search
best_params = None
best_loss = float('inf')
best_model_number = None
model_counter = 1

for params in param_combinations:

    print(f"\nTraining model with parameters: {params}")

    model = unet(pretrained=False, base=4)

    tg = DataGenerator(paths=train_paths, batch_size=params['batch_size'], augment=True)

    model.compile(optimizer=params['optimizer'](learning_rate=params['learning_rate']),
                  loss=params['loss'],
                  metrics=[dice])

    # Define EarlyStopping callback
    early_stopping = EarlyStopping(monitor='val_loss',min_delta=0.005,mode="min", patience=5, restore_best_weights=True)

    # Define validation generator
    val_generator = DataGenerator(paths=val_paths, batch_size=params['batch_size'], augment=False)

    model.fit_generator(generator=tg,
                        steps_per_epoch=len(tg),
                        epochs=100,  # Adjust the number of epochs based on your needs
                        validation_data=val_generator,
                        validation_steps=len(val_generator),
                        verbose=1,
                        callbacks=[early_stopping])

    # Evaluate on the validation set
    val_loss = model.evaluate_generator(generator=val_generator, steps=len(val_generator))

    if val_loss[0] < best_loss:
        best_loss = val_loss[0]
        best_params = params
        best_model_number = model_counter

    model_counter += 1

# Print the best hyperparameters and the corresponding model number
print(f"Best Hyperparameters for Model {best_model_number}:", best_params)

# Final Training
In the final training we trained our model with the best parameters that we received in the hyperparameter optimization. We set 200 epochs to be sure we give enough iterations for the model to achieve proper results however, with early stopping already 130 was enough.
[Link to Google colab where it showcases some runs.](https://colab.research.google.com/drive/1tyVwgMh-6IZ4REJ_7dydBFzHy22eCt_0?usp=sharing)

In [None]:
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam, SGD, RMSprop



def sorted_fns(dir):
    return sorted(os.listdir(dir), key=lambda x: int(x.split('.')[0][-4:]))



all_paths = [os.path.join(directory_path, x) for x in sorted_fns(directory_path)]
print(all_paths[1])

# Split the data into training and validation sets
train_paths, val_paths = train_test_split(all_paths, test_size=0.2)#, random_state=42)
print(len(train_paths))
print(len(val_paths))
print(train_paths[0])

model = unet(pretrained=False, base=4)

tg = DataGenerator(paths=train_paths, batch_size=32, augment=False)

# Define EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', mode="min", verbose=1, patience=30, min_delta=0.005)#, restore_best_weights=True)

# # Define validation generator
val_generator = DataGenerator(paths=val_paths, batch_size=32, augment=False)

In [None]:
model.fit(tg,
          steps_per_epoch=len(tg),
          epochs=200,
          verbose=1,
          validation_data=val_generator,
          # validation_steps=len(val_generator)
          callbacks=[model_checkpoint_callback, early_stopping]#[early_stopping, checkpoint, train_val, tb_mask]
          )

# Saving out the model
After the training we saved out the model to use it later on for the results

In [None]:
savemodelto = 'drive/My Drive/carseg_data/'
model.save(savemodelto+'model.keras')

In [None]:
!pip install h5py

In [None]:
from keras.models import model_from_json

# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)

model.save_weights("model.h5")

In [None]:
print("Model saved.")

# Results with dice values
After we trained the model we represented the achieved results using the three different dice values. Also to get a feeling regarding the overall performance with all the test images, we took the averages of the dice values.
[Link to Google colab where it showcases some runs.](https://colab.research.google.com/drive/1tyVwgMh-6IZ4REJ_7dydBFzHy22eCt_0?usp=sharing)

In [None]:
class_to_rgb = {
    1: [255,128,0],
    2: [0,255,0],
    3: [255,255,0],
    4: [0,255,255],
    5: [128,0,255],
    6: [0,255,128],
    7: [0,128,255],
    8: [255,0,128],
    9: [255,255,255],
    0: [0,0,0],
}

In [None]:
def classToRGB(_paddedPrediction,_class_to_rgb):
  for x in range(0,_paddedPrediction.shape[0]):
    for y in range(0,_paddedPrediction.shape[1]):
      _paddedPrediction[x][y] = _class_to_rgb[_paddedPrediction[x][y][0]]

In [None]:
directory_path = 'drive/My Drive/carseg_data/test/'
# regex_pattern = r'black_5_doors_[0-9]*\.npy'
regex_pattern = r'photo_[0-9]*\.npy'
black_car_image_file_names = sorted(filter_files_by_regex(directory_path, regex_pattern))

In [None]:
numpy_arraysx, numpy_arraysy = load_numpy_arrays(directory_path, black_car_image_file_names, max_load=30)

In [None]:
model.load_weights(checkpoint_filepath)

In [None]:
# Initialize variables to accumulate dice values
total_full_dice = 0
total_car_dice = 0
total_parts_dice = 0

# Predict all images loaded in
for i in range(0, len(numpy_arraysx)):
    y_true = np.load(f'drive/My Drive/carseg_data/new_arrays/photo_{str(i+1).zfill(4)}.npy')
    y_true = (y_true[...,-1] / 10).astype(int)

    # Image to predict
    image = numpy_arraysx[i].reshape(1, 256, 256, 3)
    prediction = np.argmax(model.predict(image, verbose=0).squeeze(), axis=2)

    car = y_true != 0
    car_part = np.logical_and(y_true != 9, car)

    full_dice = 2 * (prediction == y_true).sum() / 256 / 256 / 2
    car_dice = 2 * (np.logical_and(prediction == y_true, car)).sum() / car.sum() / 2
    parts_dice = 2 * (np.logical_and(prediction == y_true, car_part)).sum() / car_part.sum() / 2

    # Accumulate dice values
    total_full_dice += full_dice
    total_car_dice += car_dice
    total_parts_dice += parts_dice

    print(f"Image {i+1}:")
    print("Full dice:", full_dice)
    print("Car dice:", car_dice)
    print("Parts dice:", parts_dice)

    paddedPrediction = np.pad(prediction[...,np.newaxis], ((0,0),(0,0),(0,2)), mode='constant', constant_values=1)
    classToRGB(paddedPrediction, class_to_rgb)
    # Predictions:
    # Input and expected output:
    show_image(numpy_arraysx[i])
    show_image(numpy_arraysy[i])
    show_image(paddedPrediction)

# Calculate averages
avg_full_dice = total_full_dice / len(numpy_arraysx)
avg_car_dice = total_car_dice / len(numpy_arraysx)
avg_parts_dice = total_parts_dice / len(numpy_arraysx)

# Print averages
print("\nAverage Dice Values:")
print("Average Full Dice:", avg_full_dice)
print("Average Car Dice:", avg_car_dice)
print("Average Parts Dice:", avg_parts_dice)