# Code Snippet 1
Description: Code used to move a random number of images from one directory to another.
 
Note: This was used to select the 385 images from the LAION-400M dataset.

In [60]:
import os
import shutil
import random

def move_random_images(source_dir, destination_dir, num_images_to_move):
    # Get a list of all files in the source directory
    all_images = [f for f in os.listdir(source_dir) if f.endswith('.jpg') or f.endswith('.png')]

    # Randomly select the specified number of images
    selected_images = random.sample(all_images, min(num_images_to_move, len(all_images)))

    # Move the selected images to the destination directory
    for image in selected_images:
        source_path = os.path.join(source_dir, image)
        destination_path = os.path.join(destination_dir, image)
        shutil.move(source_path, destination_path)
        print(f'Moved: {image}')

# Replace these paths with your actual source and destination directories
source_directory = 'C:\\Users\\User\\FYP\\DownloadedImages\\LAION-400M\\doctorImagesFiltered'
destination_directory = 'C:\\Users\\User\\FYP\DownloadedImages\\LAION-400M\\doctorImageSubsetsForProcessing\\tmp'

# Specify the number of images you want to move
num_images_to_move = 385

# Call the function to move random images
move_random_images(source_directory, destination_directory, num_images_to_move)


Moved: 65.jpg
Moved: 2.jpg
Moved: 45.jpg
Moved: 12.jpg
Moved: 123.jpg
Moved: 170.jpg
Moved: 115.jpg
Moved: 179.jpg
Moved: 178.jpg
Moved: 132.jpg
Moved: 81.jpg
Moved: 192.jpg
Moved: 172.jpg
Moved: 84.jpg
Moved: 117.jpg
Moved: 129.jpg
Moved: 79.jpg
Moved: 18.jpg
Moved: 1.jpg
Moved: 21.jpg
Moved: 96.jpg
Moved: 32.jpg
Moved: 38.jpg
Moved: 93.jpg
Moved: 55.jpg
Moved: 35.jpg
Moved: 92.jpg
Moved: 155.jpg
Moved: 27.jpg
Moved: 191.jpg
Moved: 83.jpg
Moved: 133.jpg
Moved: 144.jpg
Moved: 88.jpg
Moved: 143.jpg
Moved: 39.jpg
Moved: 140.jpg
Moved: 151.jpg
Moved: 64.jpg
Moved: 118.jpg
Moved: 128.jpg
Moved: 15.jpg
Moved: 168.jpg
Moved: 94.jpg
Moved: 157.jpg
Moved: 0.jpg
Moved: 40.jpg
Moved: 97.jpg
Moved: 106.jpg
Moved: 48.jpg
Moved: 61.jpg
Moved: 112.jpg
Moved: 153.jpg
Moved: 87.jpg
Moved: 75.jpg
Moved: 67.jpg
Moved: 119.jpg
Moved: 114.jpg
Moved: 8.jpg
Moved: 108.jpg
Moved: 14.jpg
Moved: 131.jpg
Moved: 185.jpg
Moved: 53.jpg
Moved: 186.jpg
Moved: 134.jpg
Moved: 110.jpg
Moved: 166.jpg
Moved: 24.jpg
Moved

# Code Snippet 2
Description: Code used to determine which images have only a specified number of people in them and copy those images to another directoy.

Note: This was used to select the 97 images used for the Google Form.

In [27]:
import os
import dlib
import cv2
from ultralytics import YOLO
import torch

# source_dir = 'C:\\Users\\User\\FYP\\MidJourney\\Nurse'   
# source_dir = "C:\\Users\\User\\FYP\\StableDiffusion\\StableDiffusion\\stable-diffusion-webui\\outputs\\txt2img-images\\Doctor&Nurse(DPM++ 2M Karras)"
source_dir = "C:\\Users\\User\\FYP\\StableDiffusion\\StableDiffusion\\stable-diffusion-webui\\outputs\\txt2img-images\\2024-03-16"

all_images = [f for f in os.listdir(source_dir) if f.endswith('.jpg') or f.endswith('.png')]

# Loading the YOLO model. This is downloaded automatically when run for the first time.
model = YOLO('yolov8n.pt')

person_images = []
confidence_threshold = 0.5
number_of_people = 2
# num_images = 97

for image in all_images:
    input_image_path = os.path.join(source_dir, image)
    image = dlib.load_rgb_image(input_image_path)
    # Making predictions
    predictions = model.predict(image, classes=0)

    scores = predictions[0].boxes.conf
    filtered_indices = torch.where(scores > confidence_threshold)[0]

    if len(filtered_indices) == number_of_people:
        person_images.append(image) 

# Randomly select the specified number of images
selected_images = person_images#random.sample(person_images, min(num_images, len(person_images)))

for imgIndex, image in enumerate(selected_images):
    # save_path = 'C:\\Users\\User\\FYP\\MidJourney\\Nurse1Person'
    save_path = "C:\\Users\\User\\FYP\\StableDiffusion\\StableDiffusion\\stable-diffusion-webui\\outputs\\txt2img-images\\result"   
    save_path = os.path.join(save_path,str(imgIndex)+".jpg")
    cv2.imwrite(save_path, cv2.cvtColor(image, cv2.COLOR_BGR2RGB))


0: 640x640 2 persons, 267.1ms
Speed: 6.0ms preprocess, 267.1ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 246.1ms
Speed: 5.0ms preprocess, 246.1ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 252.1ms
Speed: 5.0ms preprocess, 252.1ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 237.1ms
Speed: 5.0ms preprocess, 237.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 257.1ms
Speed: 6.0ms preprocess, 257.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 3 persons, 253.1ms
Speed: 6.0ms preprocess, 253.1ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 3 persons, 263.1ms
Speed: 6.0ms preprocess, 263.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 256.1ms
Speed: 5.0ms preprocess, 256.1ms inference, 3.0ms postprocess per 

In [10]:
import os
import dlib
import cv2
from deepface import DeepFace
import pandas as pd

source_dir = 'C:\\Users\\User\\FYP\\Dall-E\\GeneratedImages\\NurseFinalisedFiltering'  
data_csv_path = 'C:\\Users\\User\\FYP\\Dall-E\\GeneratedImages\\NurseFinalisedFiltering\\data.csv'   

# Load data.csv
data_csv = pd.read_csv(data_csv_path)
# Convert DataFrame to dictionary
data_csv = data_csv.to_dict(orient='records')

all_images = [f for f in os.listdir(source_dir) if f.endswith('.jpg') or f.endswith('.png')]

saved_images_data = []

imgIndex = 0
for image in all_images:
    image_name = image
    input_image_path = os.path.join(source_dir, image)
    image = dlib.load_rgb_image(input_image_path)

    # Using the mtcnn detector to detect faces in the image
    objs = DeepFace.analyze(image, enforce_detection=False,  detector_backend="mtcnn", silent=True)

    if len(objs) == 2:
        save_path = 'C:\\Users\\User\\FYP\\Dall-E\\GeneratedImages\\NurseFinalisedFilteringOrdered'
        save_path = os.path.join(save_path,str(imgIndex)+".jpg")
        cv2.imwrite(save_path, cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) 

        image_index = next((index for index, item in enumerate(data_csv) if item.get('image_no') == int(image_name.split('.')[0])), None)
        saved_images_data.append(data_csv[image_index])
        imgIndex += 1

saved_images_data_csv = pd.DataFrame(saved_images_data)
saved_images_data_csv.to_csv('C:\\Users\\User\\FYP\\Dall-E\\GeneratedImages\\NurseFinalisedFilteringOrdered\\data.csv', index=False)

# Code Snippet 3

Description: Code used to load and process the data retrieved from the Google Forms

In [168]:
# Note: The NAN values in the CSV file are present as those images were annotated by other users. 
# For instance image 0 was annoted by user 1 & 2 but image 25 was annoted by users 3 & 4 so 
# image 0 has NaN values for user 3 & 4 and image 25 has NaN values for user 1 & 2.

import pandas as pd

# Replace 'your_file.csv' with the actual path to your CSV file
file_path = ['GoogleFormResponses\\Doctor\\Doctor Annotation - 1.csv',
             'GoogleFormResponses\\Doctor\\Doctor Annotation - 2.csv',
             'GoogleFormResponses\\Doctor\\Doctor Annotation - 3.csv',
             'GoogleFormResponses\\Doctor\\Doctor Annotation - 4.csv']

# Initialize an empty DataFrame to store the concatenated data
df = pd.DataFrame()

# Loop through each CSV file and concatenate the DataFrames
for index, csv_file in enumerate(file_path):
    part_df = pd.read_csv(csv_file)

    # Changing the column names for easier access
    # List of column names
    column_names = part_df.keys()

    column_mapping = {}
    column_mapping[column_names[1]] = "Gender_User"
    column_mapping[column_names[2]] = "Race_User"
    column_mapping[column_names[3]] = "Age_User"

    image_number = index*25
    # Loop through the column names, starting from the 5th column
    for index in range(4,len(column_names),3):
        column_mapping[column_names[index]] = "Gender_"+str(image_number)
        column_mapping[column_names[index+1]] = "Race_"+str(image_number)
        column_mapping[column_names[index+2]] = "Age_"+str(image_number)
        image_number += 1
    # Rename the columns using the rename method
    part_df = part_df.rename(columns=column_mapping)
    df = pd.concat([df, part_df], ignore_index=True)

df

Unnamed: 0,Timestamp,Gender_User,Race_User,Age_User,Gender_0,Race_0,Age_0,Gender_1,Race_1,Age_1,...,Age_93,Gender_94,Race_94,Age_94,Gender_95,Race_95,Age_95,Gender_96,Race_96,Age_96
0,2024/03/09 2:44:44 pm CET,Male,10,Armenia,Male,Latino Hispanic,12.0,Female,Black,54.0,...,,,,,,,,,,
1,2024/03/09 2:47:48 pm CET,Female,78,Austria,Male,Asian,78.0,Male,White,90.0,...,,,,,,,,,,
2,2024/03/09 2:44:44 pm CET,Male,10,Armenia,,,,,,,...,,,,,,,,,,
3,2024/03/09 2:47:48 pm CET,Female,78,Austria,,,,,,,...,,,,,,,,,,
4,2024/03/09 2:44:44 pm CET,Male,10,Armenia,,,,,,,...,,,,,,,,,,
5,2024/03/09 2:47:48 pm CET,Female,78,Austria,,,,,,,...,,,,,,,,,,
6,2024/03/09 2:44:44 pm CET,Male,10,Armenia,,,,,,,...,55.0,Female,Latino Hispanic,100.0,Female,Asian,12.0,Female,Indian,42.0
7,2024/03/09 2:47:48 pm CET,Female,78,Austria,,,,,,,...,31.0,Male,Latino Hispanic,13.0,Female,White,31.0,Male,Latino Hispanic,31.0


In [169]:
# Retrieving the image data, the Nan values are removed for the sake of simplicity
image_set_data = {}

# The number of images is calculated by subtracting 4 (Timestamp/Gender_User/Race_User/Age_User) 
# from the total number of columns and then dividing by 3 (Number of labels for each image)
number_of_images = (len(df.keys())-4)/3
for image_index in range(0,number_of_images):
    image_data = {}
    image_data["age"] = list(df["Age_"+str(image_index)].dropna())
    image_data["gender"] = list(df["Gender_"+str(image_index)].dropna())
    image_data["race"] = list(df["Race_"+str(image_index)].dropna())
    image_set_data[image_index] = image_data

image_set_data

{0: {'age': [12.0, 78.0],
  'gender': ['Male', 'Male'],
  'race': ['Latino Hispanic', 'Asian']},
 1: {'age': [54.0, 90.0],
  'gender': ['Female', 'Male'],
  'race': ['Black', 'White']},
 2: {'age': [32.0, 99.0],
  'gender': ['Female', 'Male'],
  'race': ['Asian', 'Black']},
 3: {'age': [66.0, 98.0],
  'gender': ['Female', 'Male'],
  'race': ['Indian', 'White']},
 4: {'age': [31.0, 78.0],
  'gender': ['Male', 'Male'],
  'race': ['White', 'Black']},
 5: {'age': [12.0, 90.0],
  'gender': ['Male', 'Male'],
  'race': ['Black', 'White']},
 6: {'age': [22.0, 89.0],
  'gender': ['Male', 'Male'],
  'race': ['White', 'Latino Hispanic']},
 7: {'age': [22.0, 89.0],
  'gender': ['Male', 'Male'],
  'race': ['Indian', 'Middle Eastern']},
 8: {'age': [44.0, 23.0],
  'gender': ['Male', 'Male'],
  'race': ['Indian', 'Black']},
 9: {'age': [44.0, 42.0],
  'gender': ['Female', 'Male'],
  'race': ['White', 'Black']},
 10: {'age': [44.0, 42.0],
  'gender': ['Female', 'Female'],
  'race': ['Asian', 'Indian']