In [None]:
import numpy as np
from numpy.linalg import norm
import os
import matplotlib.pyplot as plt
import cv2
import re
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


The below file paths will most likely need to be changed to run this project.

In [None]:
landmarks1 = "/content/drive/MyDrive/Final_Year_Project/final_year_project/Images/Landmark/landmark_list_part1.txt"
landmarks2 = "/content/drive/MyDrive/Final_Year_Project/final_year_project/Images/Landmark/landmark_list_part2.txt"
landmarks3 = "/content/drive/MyDrive/Final_Year_Project/final_year_project/Images/Landmark/landmark_list_part3.txt"
!unzip "/content/drive/MyDrive/Final_Year_Project/final_year_project/Images/UTKface_Alignedcropped.zip" -d "/content"

In [None]:
cropped_image_labels = []
image_var = []
cropped_file_re = re.compile('\d{1,3}_[01]_[0-4]_\d+\..+[jpg]+') # Expected format for the filenames

# Loop to extract labels from the images
for filename in os.listdir("/content/UTKface_Aligned&cropped/UTKFace"):
  if cropped_file_re.match(filename) != None: # Non-match will return None
    temp_cropped_image_labels = []
    temp_cropped_image_labels.append(filename)
    split = filename.split('_', 3) # Split filename by the '_' character to get the labels
    temp_cropped_image_labels.append(int(split[0])) # Age label
    temp_cropped_image_labels.append(int(split[1])) # Gender label
    temp_cropped_image_labels.append(int(split[2])) # Ethnicity label
    cropped_image_labels.append(temp_cropped_image_labels)
    image_var.append(cv2.imread(os.path.join("/content/UTKface_Aligned&cropped/UTKFace", filename)))

# Pruning Function

Function created with the purpose of removing specified values from the image set to allow a bias to be created

In [None]:
def removeValuesControl(lableList, images, ageMin, ethnicityBias):
  """
  Remove specified values and remove the inate bias within the dataset
  :param labelList: list of lists containing the filenames and labels for each image
  :param images: list of Ndarrays containing images
  :param ageMin: int value that states minimum age allowed in dataset
  :param ethnicityBias: int value that states the ethnicity label to be removed from the dataset
  """
  tempPrunedLableList = []
  tempPrunedImages = []
  whiteHolder = []
  whiteImageHolder = []
  combinedHolder = []
  combinedImageHolder = []
  prunedLableList = []
  prunedImages = []
  # Removal of images that fall under the specified values of ageMin and ethnicityBias
  for i in range(len(lableList)):
    if (lableList[i][1] > ageMin) and (lableList[i][3] != ethnicityBias):
      tempPrunedLableList.append(lableList[i])
      tempPrunedImages.append(images[i])

  # Separation of ethicity label 0
  for j in range(len(tempPrunedLableList)):
    if tempPrunedLableList[j][3] == 0:
      whiteHolder.append(tempPrunedLableList[j])
      whiteImageHolder.append(tempPrunedImages[j])
    else:
      combinedHolder.append(tempPrunedLableList[j])
      combinedImageHolder.append(tempPrunedImages[j])

  # Halving the number of images of ethnicity label 0 in the dataset to remove the inate bias in the dataset
  for k in range(len(whiteHolder)):
    if k % 2 == 0:
      prunedLableList.append(whiteHolder[k])
      prunedImages.append(whiteImageHolder[k])

  for n in range(len(combinedHolder)):
    prunedLableList.append(combinedHolder[n])
    prunedImages.append(combinedImageHolder[n])

  return prunedLableList, prunedImages

def removeValuesBias(labelList, images, ageMin, ethnicity):
  """
  Remove all values from the dataset apart from those that >= to ageMin and share a label with ethnicity
  :param labelList: list of lists containing the filenames and labels for each image
  :images: list of Ndarrays containing images
  :ageMin: int value that states minimum age allowed in dataset
  :ethnicity: int value that states the ethnicity label to be removed from the dataset
  """
  prunedLabelList = []
  prunedImages = []
  # Removal of images that fall under the specified value of ageMin and that don't match with ethnicity
  for i in range(len(labelList)):
    if (labelList[i][1] > ageMin) and (labelList[i][3] == ethnicity):
      prunedLabelList.append(labelList[i])
      prunedImages.append(images[i])

  return prunedLabelList, prunedImages

In [None]:
control_labels, control_images = removeValuesControl(cropped_image_labels, image_var, 17, 4)
white_labels, white_images = removeValuesBias(cropped_image_labels, image_var, 17, 0)
black_labels, black_images = removeValuesBias(cropped_image_labels, image_var, 17, 1)
asian_labels, asian_images = removeValuesBias(cropped_image_labels, image_var, 17, 2)
indian_labels, indian_images = removeValuesBias(cropped_image_labels, image_var, 17, 3)
other_labels, other_images = removeValuesBias(cropped_image_labels, image_var, 17, 4)

In [None]:
def concatLists(list1, list2, list3):
  """
  Concatenate lists together
  :param list1: list to be concatenated
  :param list2: list to be concatenated
  :param list3: list to be concatenated
  """
  concatList = []
  for line1 in list1:
    concatList.append(line1)

  for line2 in list2:
    concatList.append(line2)

  for line3 in list3:
    concatList.append(line3)

  return concatList

In [None]:
open_landmark1 = open(landmarks1, 'r')
open_landmark2 = open(landmarks2, 'r')
open_landmark3 = open(landmarks3, 'r')
landmark1_str = open_landmark1.readlines()
landmark2_str = open_landmark2.readlines()
landmark3_str = open_landmark3.readlines()

landmark_list = concatLists(landmark1_str, landmark2_str, landmark3_str) # Contactenate all three .txt files containing the landmark info

landmark_info = []
# Iterate through each line of the landmark info .txt files
for string in landmark_list:
  split_str = string.split(' ') # Split the string of the current line on the ' ' character
  clean_split_str = []
  temp_landmark_info = []
  temp_landmarks = np.zeros((136,), dtype = int)
  # Remove the new line '\n' character from the list as it isn't needed
  for val in split_str:
    if val != '\n':
      clean_split_str.append(val)
  # Check to catch the one line that has an error where there is a space between the filename and the file extension making the list 1 more in length
  if len(clean_split_str) == 138:
    image_name = clean_split_str[0]
    extension = clean_split_str[1]
    true_image_name = image_name + extension # Combine filename and file extension
    temp_landmark_info.append(true_image_name)
    temp_count = 0
    # Iterate through the coordinates for a given image and inputs them to a pre-initialised Ndarray full of zeros
    for j in clean_split_str[2:137]:
      temp_landmarks[temp_count] = j
      temp_count += 1

    reshaped_temp_landmarks = temp_landmarks.reshape((68, 2)) # Reshape the coordinates so that they can actually be read as coordinates
    temp_landmark_info.append(reshaped_temp_landmarks)
    landmark_info.append(temp_landmark_info)
    continue

  temp_landmark_info.append(clean_split_str[0])
  count = 0
  # Iterate through the coordinates for a given image and inputs them to a pre-initialised Ndarray full of zeros
  for i in clean_split_str[1:136]:
    temp_landmarks[count] = int(i)
    count += 1

  reshaped_temp_landmarks = temp_landmarks.reshape((68, 2)) # Reshape the coordinates so that they can actually be read as coordinates
  temp_landmark_info.append(reshaped_temp_landmarks)
  landmark_info.append(temp_landmark_info)

In [None]:
def imagePreprocessing(images):
  """
  Applies preprocessing techniques to a given list of images
  :param images: list of Ndarrays containing images
  """
  images = np.asarray(images) # Changing the images list to a Ndarray in function for performance reasons
  image32 = images.astype("float32") / 255 # Normalise the images by making all of the pixel values being 0-1 instead of 0-255
  greyScaleImages = []
  for image in image32:
    greyScaleImages.append(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)) # Greyscale the images

  greyScaleImages = np.asarray(greyScaleImages, dtype="float32") # Change list of Ndarrays containing images to a Ndarray of Ndarrays contianing images

  return greyScaleImages

In [None]:
grey_control_images = imagePreprocessing(control_images)
grey_white_images = imagePreprocessing(white_images)
grey_black_images = imagePreprocessing(black_images)
grey_asian_images = imagePreprocessing(asian_images)
grey_indian_images = imagePreprocessing(indian_images)
grey_other_images = imagePreprocessing(other_images)

In [None]:
# Train, test variables with random state given so that both arrays are shuffled identically
train_control_labels, test_control_labels, train_control_images, test_control_images = train_test_split(control_labels,
                                                                                                        grey_control_images,
                                                                                                        train_size=0.66, random_state=2)
train_white_labels, test_white_labels, train_white_images, test_white_images = train_test_split(white_labels,
                                                                                                grey_white_images,
                                                                                                train_size=0.7, random_state=2)
train_black_labels, test_black_labels, train_black_images, test_black_images = train_test_split(black_labels,
                                                                                                grey_black_images,
                                                                                                train_size=0.7, random_state=2)
train_asian_labels, test_asian_labels, train_asian_images, test_asian_images = train_test_split(asian_labels,
                                                                                                grey_asian_images,
                                                                                                train_size=0.7,random_state=2)
train_indian_labels, test_indian_labels, train_indian_images, test_indian_images = train_test_split(indian_labels,
                                                                                                    grey_indian_images,
                                                                                                    train_size=0.7, random_state=2)

In [None]:
def trainingLandmarks(imageLabels, landmarkInfo, images):
  """
  Obtain the landmark coordinates of the images and order them in the same order as the images
  :param imageLabels: list of lists containing the filename and labels for the images
  :param landmarkInfo: list of Strings and Ndarrays containing filenames and landmark coordinates respectively
  :param images: Ndarray of Ndarrays containing images
  """
  trainLandmarkInfo = []
  for j in range(len(imageLabels)):
    # Check for if the length of the trainLandmarkInfo list matches the value of j
    if len(trainLandmarkInfo) != j:
        trainLandmarkInfo.append(np.zeros((68, 2), dtype=int)) # Add a temporary value into the array so that the rest of the values are ordered correctly
        print('Index value: ', j - 1)

    for m in range(len(landmarkInfo)):
      # Check for if the filenames match in the 2 lists
      if landmarkInfo[m][0] in imageLabels[j][0]:
        trainLandmarkInfo.append(landmarkInfo[m][1])

  for k in range(len(trainLandmarkInfo)):
    # Check for the temporary value if it was added
    if np.all(trainLandmarkInfo[k] == 0):
        images = np.delete(images, k, 0) # Removal of the image as it is useless without its landmark coordinates
        trainLandmarkInfo = np.delete(trainLandmarkInfo, k, 0) # Removal of the temporary value
        break # Temp fix. Not a fix that will work if more than one errors occur

  for n in range(len(trainLandmarkInfo)):
    # Check for any potential values that are of an unexpected type or shape
    if (type(trainLandmarkInfo[n]) != np.ndarray) or (trainLandmarkInfo[n].shape != (68, 2)):
      print(f'Index type: ', type(trainLandmarkInfo[n]), '\nIndex shape: ', trainLandmarkInfo[n].shape)

  trainLandmarkInfo = np.asarray(trainLandmarkInfo, dtype=int)

  return trainLandmarkInfo, images

In [None]:
train_control_landmarks, train_control_images = trainingLandmarks(train_control_labels, landmark_info, train_control_images)
test_control_landmarks, test_control_images = trainingLandmarks(test_control_labels, landmark_info, test_control_images)

train_white_landmarks, train_white_images = trainingLandmarks(train_white_labels, landmark_info, train_white_images)
test_white_landmarks, test_white_images = trainingLandmarks(test_white_labels, landmark_info, test_white_images)

train_black_landmarks, train_black_images = trainingLandmarks(train_black_labels, landmark_info, train_black_images)
test_black_landmarks, test_black_images = trainingLandmarks(test_black_labels, landmark_info, test_black_images)

train_asian_landmarks, train_asian_images = trainingLandmarks(train_asian_labels, landmark_info, train_asian_images)
test_asian_landmarks, test_asian_images = trainingLandmarks(test_asian_labels, landmark_info, test_asian_images)

train_indian_landmarks, train_indian_images = trainingLandmarks(train_indian_labels, landmark_info, train_indian_images)
test_indian_landmarks, test_indian_images = trainingLandmarks(test_indian_labels, landmark_info, test_indian_images)

# Pathways for model weights
To load the weights for a specific bias change the file pathway in the model.load_weights() function to one of the pathways below. The pathways below are not the complete pathways as the full pathway may vary depending on the file structure you have the weights stored in.

## Control
.../CNN_Weights/Control_bias_weights/control_checkpoint

## White bias
.../CNN_Weights/White_bias_weights/white_checkpoint_2

## Black bias
.../CNN_Weights/Black_bias_weights/black_checkpoint_2

## Asian bias
.../CNN_Weights/Asian_bias_weights/asian_checkpoint_2

## Indian bias
.../CNN_Weights/Indian_bias_weights/indian_checkpoint




In [None]:
# Define the CNN model
model = keras.Sequential([
    layers.Conv2D(32, (3,3), padding='same', strides=2, activation='selu', bias_initializer=keras.initializers.VarianceScaling, input_shape=(200, 200, 1)), # Output = 32, 100, 100
    layers.MaxPooling2D((2,2)), # Output = 32, 50, 50
    layers.Conv2D(64, (3,3), bias_initializer=keras.initializers.VarianceScaling, activation='selu'), # Output = 64, 48, 48
    layers.MaxPooling2D((2,2)), # Output = 64, 24, 24
    layers.Conv2D(128, (3,3), bias_initializer=keras.initializers.VarianceScaling, activation='selu'), # Output = 128, 22, 22
    layers.MaxPooling2D((2,2)), # Output = 128, 11, 11
    layers.Flatten(), # Output = 15488
    layers.Dense(3872, bias_initializer=keras.initializers.VarianceScaling, activation='selu'),
    layers.Dense(1290, bias_initializer=keras.initializers.VarianceScaling, activation='selu'),
    layers.Dense(645, bias_initializer=keras.initializers.VarianceScaling, activation='selu'),
    layers.Dense(322, bias_initializer=keras.initializers.VarianceScaling, activation='selu'),
    layers.Dense(68*2) # Output layer of size (136) so that can be reshaped to (68, 2)
])

# Compile the model
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001,
                                                weight_decay=0.00001),
                                                loss='mean_squarred_error',
                                                metrics=['accuracy'])
model.load_weights("/content/drive/MyDrive/Final_Year_Project/final_year_project/CNN_Weights/Control_bias_weights/control_checkpoint")

In [None]:
# Checkpoint in training to adjust learning rate
checkpoint_dir = "/content/drive/MyDrive/Final_Year_Project/final_year_project/CNN_Weights/Control_bias_weights/control_checkpoint_2"
model_weight_checkpoint = keras.callbacks.ModelCheckpoint(filepath=checkpoint_dir,
                                                          monitor='loss',
                                                          save_weights_only=True,
                                                          save_best_only=True,
                                                          mode='min',
                                                          verbose=1)

# Train the model
history = model.fit(train_control_images, np.reshape(train_control_landmarks,
                                               (train_control_landmarks.shape[0], 136)),
                                               validation_split=0.2, epochs=30,
                                               batch_size=64,
                                               callbacks=[model_weight_checkpoint])

In [None]:
def landmarkSimilarity(yHat, y):
  """
  Calculate the cosine similarity of the y-hat values against the y values
  :param yHat: Ndarray of Ndarrays containing the predicted landmark coordinates of given images
  :param y: list of Strings and Ndarrays containing filenames and landmark coordinates of images
  """
  similarityIndexes = []

  for i in range(len(yHat)):
    tempSimilarityValues = []
    for j in range(len(y)):
      tempSimilarityValues.append(np.dot(yHat[i], (y[j][1].flatten()))/(norm(yHat[i])*norm((y[j][1].flatten())))) # Calculate cosine similarity of y-hat and y

    maxIndex = tempSimilarityValues.index(max(tempSimilarityValues)) # Obtain the index value of the landmark coordinates with the greatest cosine similarity
    similarityIndexes.append(maxIndex)

  return similarityIndexes

def predictionSample(images, numImages):
  """
  Generate a set of random images
  :param images: Ndarray of Ndarrays containing images
  :param numImages: int value determining the amount of images to get
  """
  randList = []
  imageSample = []
  for n in range(numImages):
    randList.append(np.random.randint(0, len(images))) # Obtaining random index values

  for index in randList:
    imageSample.append(images[index]) # Obtaining the images determined by the index values

  imageSample = np.asarray(imageSample) # Turning list of Ndarrays containing images into Ndarray of Ndarrays containing images

  return imageSample

def similarImages(imageIndex, labelList, images, landmarkInfo):
  """
  Obtains the images determined by the index values obtained through the landmarkSimilarity function
  :param imageIndex: list of int values representing index value of the landmarkInfo parameter
  :param labelList: list of lists storing the filename and lables of the images
  :param images: list of Ndarrays storing images
  :param landmarkInfo: list of Strings and Ndarrays storing filenames and landmark coordinates respectively
  """
  similarImages = []
  rgbSimilarImages = []
  for index in imageIndex:
    for i in range(len(labelList)):
      # Check for where in labelList the filename in landmarkInfo exists
      if landmarkInfo[index][0] in labelList[i][0]:
        similarImages.append(images[i])

  for image in similarImages:
    rgbSimilarImages.append(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # Convert the images to a RGB format so that they can be displayed properly

  rgbSimilarImages = np.asarray(rgbSimilarImages)

  return rgbSimilarImages

In [None]:
# Test on 100 samples total, 20 of each ethnicity group
white_sample = predictionSample(test_white_images, 5)
white_predictions = model.predict(white_sample, batch_size=16)
white_predictions = tf.cast(white_predictions, tf.int32)
white_prediction_indexes = landmarkSimilarity(white_predictions, landmark_info)
white_similar_images = similarImages(white_prediction_indexes, cropped_image_labels, image_var, landmark_info)

black_sample = predictionSample(test_black_images, 5)
black_predictions = model.predict(black_sample, batch_size=16)
black_predictions = tf.cast(black_predictions, tf.int32)
black_prediction_indexes = landmarkSimilarity(black_predictions, landmark_info)
black_similar_images = similarImages(black_prediction_indexes, cropped_image_labels, image_var, landmark_info)

asian_sample = predictionSample(test_asian_images, 5)
asian_predictions = model.predict(asian_sample, batch_size=16)
asian_predictions = tf.cast(asian_predictions, tf.int32)
asian_prediction_indexes = landmarkSimilarity(asian_predictions, landmark_info)
asian_similar_images = similarImages(asian_prediction_indexes, cropped_image_labels, image_var, landmark_info)

indian_sample = predictionSample(test_indian_images, 5)
indian_predictions = model.predict(indian_sample, batch_size=16)
indian_predictions = tf.cast(indian_predictions, tf.int32)
indian_prediction_indexes = landmarkSimilarity(indian_predictions, landmark_info)
indian_similar_images = similarImages(indian_prediction_indexes, cropped_image_labels, image_var, landmark_info)

other_sample = predictionSample(grey_other_images, 5)
other_predictions = model.predict(other_sample, batch_size=16)
other_predictions = tf.cast(other_predictions, tf.int32)
other_prediction_indexes = landmarkSimilarity(other_predictions, landmark_info)
other_similar_images = similarImages(other_prediction_indexes, cropped_image_labels, image_var, landmark_info)

In [None]:
for idx in range(len(white_sample)):
  fig, axes = plt.subplots(1,2)
  axes[0].imshow(white_sample[idx], cmap='gray')
  axes[1].imshow(white_similar_images[idx])

In [None]:
for idx in range(len(black_sample)):
  fig, axes = plt.subplots(1,2)
  axes[0].imshow(black_sample[idx], cmap='gray')
  axes[1].imshow(black_similar_images[idx])

In [None]:
for idx in range(len(asian_sample)):
  fig, axes = plt.subplots(1,2)
  axes[0].imshow(asian_sample[idx], cmap='gray')
  axes[1].imshow(asian_similar_images[idx])

In [None]:
for idx in range(len(indian_sample)):
  fig, axes = plt.subplots(1,2)
  axes[0].imshow(indian_sample[idx], cmap='gray')
  axes[1].imshow(indian_similar_images[idx])

In [None]:
for idx in range(len(other_sample)):
  fig, axes = plt.subplots(1,2)
  axes[0].imshow(other_sample[idx], cmap='gray')
  axes[1].imshow(other_similar_images[idx])