# Turing test image selection

This notebook will help randomly select images from dataset, crop and normalize the images and reconstruct images to form the Turing test dataset

## Helper functions

In [6]:
import cv2
import os
import re
import numpy as np
import pickle as pkl
import torch
import dnnlib
import legacy
import pandas as pd
import random


def resize_crop(img_dir, resize_by=1., resolution=512, brightness_norm=True, brightness_mean=107.2, locations=None):
    if locations is None:
        locations = ["left", "right"]
    img = cv2.imread(img_dir, cv2.IMREAD_UNCHANGED)
    if img.dtype != np.uint8:
        img = np.uint8(img / 256)
    # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_shape = img.shape
    resize_shape = np.array([img_shape[1] * resize_by, img_shape[0] * resize_by], dtype=int)
    if resize_by != 1:
        img = cv2.resize(img, resize_shape, cv2.INTER_LANCZOS4)
    imgs = []
    for location in locations:
        if location == "left":
            new_img = img[(resize_shape[1] - resolution) // 2:(resize_shape[1] + resolution) // 2, :resolution]
        elif location == "right":
            new_img = img[(resize_shape[1] - resolution) // 2:(resize_shape[1] + resolution) // 2, -resolution:]
        else:
            new_img = img[(resize_shape[1] - resolution) // 2:(resize_shape[1] + resolution) // 2,
                      (resize_shape[0] - resolution) // 2:(resize_shape[0] + resolution) // 2]
        if brightness_norm:
            obj_v = np.mean(new_img)
            value = brightness_mean - obj_v
            new_img = cv2.add(new_img, value)
        imgs.append(new_img)
    return imgs


# Randomly select images

In [7]:
ROOT_DIR = "/home/xavier/Documents/dataset/Welch/trainingset2/trainingset2"
LABEL_DIR = "/home/xavier/Documents/dataset/Welch/trainingset2/InceptionV3-labels.pkl"
samples = 30
# timepoint_pool = [60] * 1 + [600] * 2 + [960] * 3 + [1440] * 4

label_dict = pkl.load(open(LABEL_DIR, 'rb'))

class_run_scope_idx = []
for class_label in label_dict:
    for file_string in label_dict[class_label]:
        strain = file_string.split('/')[0]
        scope_pattern = r"Scope(\d{2})/"
        scope_match = int(re.search(scope_pattern, file_string).group(1))
        index_pattern = r"_(\d{4}).jpg"
        index_match = int(re.search(index_pattern, file_string).group(1))
        class_run_scope_idx.append((class_label, strain, scope_match, index_match))
class_run_scope_idx = pd.DataFrame(class_run_scope_idx, columns=["Class", "Strain", "Scope", "Index"])

annotations = []
for strain in os.listdir(ROOT_DIR):
    scopes = [int(folder[-2:]) for folder in os.listdir(os.path.join(ROOT_DIR, strain))]
    pattern = r"(_scope)(\d{1,2})(-00_)(\d{4})(\.jpg)"
    img_name = re.sub(pattern, r"\g<1>" + "%d" + r"\3" + "%04d" + r"\5",
                      os.listdir(os.path.join(ROOT_DIR, strain, f"Scope{scopes[0]:02d}"))[0])
    scopes += [None] * (3 - len(scopes))
    annotations.append([strain] + scopes + [img_name])

df = pd.DataFrame(annotations, columns=['Strain', 'Scope1', 'Scope2', 'Scope3', "Name"])
# Convert 'Scope' columns to nullable integer type
df['Scope1'] = df['Scope1'].astype('Int64')
df['Scope2'] = df['Scope2'].astype('Int64')
df['Scope3'] = df['Scope3'].astype('Int64')

filtered_df = df.dropna(subset=['Scope2'])

# Filter strains with at least 2 scopes.
class_run_scope_idx = class_run_scope_idx[class_run_scope_idx['Strain'].isin(filtered_df['Strain'])]

final_directory_pairs = []
class_label = 0
selected_strains = set()
while len(final_directory_pairs) < samples:
    # Filter class_run_scope_idx to include only the current class with available strains
    current_class_samples = class_run_scope_idx[class_run_scope_idx["Class"] == class_label]

    if not current_class_samples.empty:
        # Randomly select a sample from the current class
        sample_row = current_class_samples.sample(n=1).iloc[0]
        strain, scope1, index = sample_row["Strain"], sample_row["Scope"], sample_row["Index"]

        # Get the experimental replicate information
        run_info = df[df["Strain"] == strain].iloc[0]
        valid_scopes = [option for option in [run_info["Scope1"], run_info["Scope2"], run_info["Scope3"]] if
                        not pd.isna(option) and option != scope1]

        if valid_scopes:
            scope2 = random.choice(valid_scopes)
            file1 = f"{ROOT_DIR}/{strain}/Scope{scope1:02d}/{run_info['Name'] % (scope1, index)}"
            file2 = f"{ROOT_DIR}/{strain}/Scope{scope2:02d}/{run_info['Name'] % (scope2, index)}"
            # print(file1, file2)
            if not os.path.exists(file1) or not os.path.exists(file2):
                print(file1, file2)
            #     index -= 1
            #     file1 = f"{ROOT_DIR}/{strain}/Scope{scope1:02d}/{run_info['Name'] % (scope1, index)}"
            #     file2 = f"{ROOT_DIR}/{strain}/Scope{scope2:02d}/{run_info['Name'] % (scope2, index)}"
            final_directory_pairs.append([file1, file2])
            if strain in selected_strains:
                print(strain)
            selected_strains.add(strain)
            # Remove the selected strain from all classes
            class_run_scope_idx = class_run_scope_idx[class_run_scope_idx["Strain"] != strain]

    # Move to the next class in a round-robin fashion
    class_label = (class_label + 1) % 13
    if class_run_scope_idx.empty:
        break  # Exit if there are no more eligible strains across all classes

# For alignment test
## Get images and reconstructions

In [8]:
OUT_DIR = "/home/xavier/PycharmProjects/TuringTest/fidelity"

os.environ['CC'] = "/usr/bin/gcc-9"
os.environ['CXX'] = "/usr/bin/g++-9"
device = torch.device('cuda')
model_dict = {
    # 7: "/home/xavier/PycharmProjects/training-runs/new/e7/00001-stylegan2-trainingset2-gpus4-batch96-gamma10/network-snapshot-001461.pkl",
    # 10: "/home/xavier/PycharmProjects/training-runs/new/e10/00001-stylegan2-trainingset2-gpus4-batch96-gamma10/network-snapshot-001411.pkl",
    # 12: "/home/xavier/PycharmProjects/training-runs/new/e12/00008-stylegan2-trainingset2-gpus4-batch96-gamma10/network-snapshot-001461.pkl",
    13: "/home/xavier/PycharmProjects/training-runs/new/e13/00008-stylegan2-trainingset2-gpus4-batch96-gamma10/network-snapshot-001461.pkl",
    # 14: "/home/xavier/PycharmProjects/training-runs/new/e14/00010-stylegan2-trainingset2-gpus4-batch96-gamma10/network-snapshot-001461.pkl",
    # 18: "/home/xavier/PycharmProjects/training-runs/new/e18/00001-stylegan2-trainingset2-gpus4-batch96-gamma10/network-snapshot-001461.pkl",
}
os.makedirs(OUT_DIR, exist_ok=True)


def save_image(image, container, filename):
    path = os.path.join(OUT_DIR, container)
    if not os.path.exists(path):
        os.makedirs(path)
    cv2.imwrite(os.path.join(path, filename), image)


imgs_orig = []
imgs_other = []
imgs_rep = []
for i, (scope1, scope2) in enumerate(final_directory_pairs):
    left_crop, right_crop = resize_crop(scope1)
    center_crop = resize_crop(scope2, locations=['center'])[0]
    save_image(left_crop, 'crops_l', os.path.basename(scope1))
    save_image(right_crop, 'crops_r', os.path.basename(scope1))
    save_image(center_crop, 'crops_cp', os.path.basename(scope2))
    imgs_orig.append(left_crop[np.newaxis, np.newaxis, :, :])
    imgs_other.append(right_crop[np.newaxis, np.newaxis, :, :])
    imgs_rep.append(center_crop[np.newaxis, np.newaxis, :, :])

loss_all = {}
# Now calculate and add the similarities for each model's reconstructions
for model_e, model_path in model_dict.items():
    with dnnlib.util.open_url(model_path) as fp:
        models = legacy.load_network_pkl(fp)
    E, G = models['E_ema'].to(device), models['G_ema'].to(device)

    loss_all[model_e] = 0
    similarities = []
    for img_l, (img_name, _) in zip(imgs_orig, final_directory_pairs):
        img_l_tensor = torch.tensor(img_l, device=device, dtype=torch.float32).div(127.5).sub(1)
        mu, logvar = E.mu_var(img_l_tensor, None)
        recon = G(mu, None).detach()
        recon_clipped = torch.clip(recon, -1, 1)
        recon_rescaled = recon_clipped.add(1).div(2).mul(255).type(torch.uint8)
        recon_output = recon_rescaled.detach().cpu().numpy()[0, 0]
        subdir = f'dim_{model_e}_reconstructions'
        save_image(recon_output, subdir, os.path.basename(img_name))


In the next step, use project_all.sh in stylegan2-ada-pytorch to get the other reconstruction.

In [12]:
PROJECT_DIR = "/home/xavier/PycharmProjects/TuringTest/new/crops_bp"
COLLECT_DIR = "/home/xavier/PycharmProjects/TuringTest/new/crops_stylegan2"
os.makedirs(COLLECT_DIR, exist_ok=True)
import shutil

for folder in os.listdir(PROJECT_DIR):
    shutil.copy(os.path.join(PROJECT_DIR, folder, "proj.png"), os.path.join(COLLECT_DIR, f"{folder}.png"))


# Convert image labels to embeddings

In [5]:
# File path to your .xlsx file
file_path = '/home/xavier/PycharmProjects/TuringTest/new/Qualtrics_labels.xlsx'

# Open the Excel file
xls = pd.ExcelFile(file_path)

# Initialize a list to hold data from each sheet
sheets_data = []

# Iterate over each sheet in the Excel file
for sheet_name in xls.sheet_names:
    # Read the current sheet
    sheet_data = pd.read_excel(xls, sheet_name)

    # Assuming the image column is named 'Image' and label column is 'Label'
    # Adjust column names as per your file
    image_column = sheet_data['Image']
    label_column = sheet_data['Label']

    # Transform the label column to the desired format
    transformed_label = label_column.apply(lambda
                                               x: f'<img src="https://riceuniversity.co1.qualtrics.com/ControlPanel/Graphic.php?IM={x}" style="width:256px;height:256px;"/>')

    # If it's the first sheet, keep the image column; else, only keep the transformed label
    if sheet_name == xls.sheet_names[0]:
        sheets_data.append(pd.DataFrame({
            'Image': image_column,
            f'Label_{sheet_name}': transformed_label
        }))
    else:
        sheets_data.append(pd.DataFrame({
            f'Label_{sheet_name}': transformed_label
        }))

# Concatenate all dataframes horizontally, assuming images are in the same order across sheets
full_sheet_data = pd.concat(sheets_data, axis=1)

# If there are duplicate 'Image' columns (from each sheet), drop them except the first occurrence
full_sheet_data = full_sheet_data.loc[:, ~full_sheet_data.columns.duplicated()]

full_sheet_data = full_sheet_data.sample(n=30)
# Save the full sheet data to a new Excel file
full_sheet_data.to_excel('/home/xavier/PycharmProjects/TuringTest/new/combined_labels30.xlsx', index=False)

print('Conversion complete. The combined labels sheet has been saved as "combined_labels.xlsx".')

# Get 30 samples