In [24]:
import os
import pandas as pd
from PIL import Image
import pillow_heif
import shutil

In [18]:
pillow_heif.register_heif_opener()

In [20]:
def convert_heic_to_jpg_inplace(folder_path):
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(".heic"):
            heic_path = os.path.join(folder_path, filename)
            jpg_filename = os.path.splitext(filename)[0] + ".jpg"
            jpg_path = os.path.join(folder_path, jpg_filename)

            try:
                image = Image.open(heic_path)
                image.save(jpg_path, "JPEG")
                print(f"Converted {filename} to {jpg_filename}")
            except Exception as e:
                print(f"Failed to convert {filename}: {e}")

folder = './faces/'
convert_heic_to_jpg_inplace(folder)

Converted ce09a15c-6f82-4ff3-b534-9f984c9ddaa0.HEIC to ce09a15c-6f82-4ff3-b534-9f984c9ddaa0.jpg
Converted 52816630-bee3-4fcc-bee8-b04e80c2b187.HEIC to 52816630-bee3-4fcc-bee8-b04e80c2b187.jpg
Converted f6016d7b-ba32-4bee-8706-984c57c04dc6.HEIC to f6016d7b-ba32-4bee-8706-984c57c04dc6.jpg
Converted 4020052a-d525-48d7-9068-b505633d2cbd.HEIC to 4020052a-d525-48d7-9068-b505633d2cbd.jpg
Converted 093da657-11a5-4a47-b7d3-56a40ee18ffc.HEIC to 093da657-11a5-4a47-b7d3-56a40ee18ffc.jpg
Converted 0f8a349d-3d68-4ea1-84f2-744c68fa8770.HEIC to 0f8a349d-3d68-4ea1-84f2-744c68fa8770.jpg
Converted a227fcba-63ef-4ddc-a8b5-6b21628f063e.HEIC to a227fcba-63ef-4ddc-a8b5-6b21628f063e.jpg
Converted bda95d1b-4075-4548-ad6a-32515336c3dd.HEIC to bda95d1b-4075-4548-ad6a-32515336c3dd.jpg
Converted 1359e613-fec0-4d19-b377-3d941fb5818a.HEIC to 1359e613-fec0-4d19-b377-3d941fb5818a.jpg
Converted 65559dd6-124b-4d6b-a6b5-c2d982dead4b.HEIC to 65559dd6-124b-4d6b-a6b5-c2d982dead4b.jpg
Converted 41948b13-7341-4bf5-86b3-fad9b3

In [21]:
def delete_heic_files(folder_path):
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(".heic"):
            file_path = os.path.join(folder_path, filename)
            try:
                os.remove(file_path)
                print(f"Deleted {filename}")
            except Exception as e:
                print(f"Failed to delete {filename}: {e}")

folder = "./faces/"
delete_heic_files(folder)

Deleted ce09a15c-6f82-4ff3-b534-9f984c9ddaa0.HEIC
Deleted 52816630-bee3-4fcc-bee8-b04e80c2b187.HEIC
Deleted f6016d7b-ba32-4bee-8706-984c57c04dc6.HEIC
Deleted 4020052a-d525-48d7-9068-b505633d2cbd.HEIC
Deleted 093da657-11a5-4a47-b7d3-56a40ee18ffc.HEIC
Deleted 0f8a349d-3d68-4ea1-84f2-744c68fa8770.HEIC
Deleted a227fcba-63ef-4ddc-a8b5-6b21628f063e.HEIC
Deleted bda95d1b-4075-4548-ad6a-32515336c3dd.HEIC
Deleted 1359e613-fec0-4d19-b377-3d941fb5818a.HEIC
Deleted 65559dd6-124b-4d6b-a6b5-c2d982dead4b.HEIC
Deleted 41948b13-7341-4bf5-86b3-fad9b3deb97d.HEIC
Deleted 71020311-f484-41ae-9918-403d9f921c57.HEIC
Deleted 57a11d51-80ec-41ca-b4c0-d73730fe9fe8.HEIC
Deleted 8d8265f6-ac84-4776-b991-0374139a41c4.HEIC
Deleted e66c5e48-1ebd-4290-adea-b8252611fa71.HEIC
Deleted 07240d14-12fd-464d-b66c-25d0df592650.HEIC
Deleted b2a2d684-4043-488f-8e00-b386ae9de62e.HEIC
Deleted dc8aa4cf-4610-4abc-a93d-7a5dafda1b69.HEIC
Deleted b6535354-c500-47bd-8118-bfc6cee6e635.HEIC
Deleted 1bed3b95-10b1-43eb-b0e2-31a5fa122822.HEIC


In [23]:
import os

def check_matching_files(json_dir, image_dir):
    # Get sets of participant IDs from filenames (strip extensions)
    json_ids = {os.path.splitext(f)[0] for f in os.listdir(json_dir) if f.endswith('.json')}
    image_ids = {os.path.splitext(f)[0] for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.heic'))}

    # Find mismatches
    json_missing_for_images = image_ids - json_ids
    images_missing_for_json = json_ids - image_ids
    matched_ids = json_ids & image_ids  # Intersection = matched

    print(f"Number of JSON files: {len(json_ids)}")
    print(f"Number of face images: {len(image_ids)}")
    print(f"Number of matches: {len(matched_ids)}")

    if not json_missing_for_images and not images_missing_for_json:
        print("All JSON files have matching face images, and vice versa!")
    else:
        if json_missing_for_images:
            print("Face image(s) missing corresponding JSON file(s):")
            for missing_id in sorted(json_missing_for_images):
                print(f" - {missing_id}")
        if images_missing_for_json:
            print("JSON file(s) missing corresponding face image(s):")
            for missing_id in sorted(images_missing_for_json):
                print(f" - {missing_id}")

# Example usage:
json_folder = "user_data"
faces_folder = "faces"
check_matching_files(json_folder, faces_folder)

Number of JSON files: 60
Number of face images: 58
Number of matches: 52
Face image(s) missing corresponding JSON file(s):
 - 035efbec-f5c6-415f-bff1-88b9cc1b976c
 - 0f8a349d-3d68-4ea1-84f2-744c68fa8770
 - 1820dea5-0695-4f1d-ac2d-1529eb970a2f
 - 41948b13-7341-4bf5-86b3-fad9b3deb97d
 - 517eb3a9-9067-4dd9-b730-e493bef2fe35
 - c95cea3d-f740-41d5-8519-5fcaff1733e4
JSON file(s) missing corresponding face image(s):
 - 06760fcf-a992-4250-be07-aed0ba857cf4
 - 4b39387f-2baf-49ad-8ebd-eb641a3dd42e
 - 5a5a3daa-37bd-4262-957d-dcb7a4ec2a4d
 - 9130b0bc-f675-48ba-b7ef-190a278a4d24
 - 962e6320-6039-4d40-b866-0f4b7521d6db
 - c086b229-6ffa-487c-ba1f-5c1634eed5f3
 - f818a43f-79f3-43ef-8384-3ae2e3cb219f
 - fb8201f2-51d8-4fcd-9a38-f0294376d9e7


In [25]:
def move_unlinked_files(json_dir, image_dir, unlinked_dir):
    # Create unlinked_data folder if it doesn't exist
    os.makedirs(unlinked_dir, exist_ok=True)

    json_ids = {os.path.splitext(f)[0]: f for f in os.listdir(json_dir) if f.endswith('.json')}
    image_ids = {os.path.splitext(f)[0]: f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.heic'))}

    json_id_set = set(json_ids.keys())
    image_id_set = set(image_ids.keys())

    # Files missing matches
    json_missing_for_images = image_id_set - json_id_set
    images_missing_for_json = json_id_set - image_id_set

    matched_ids = json_id_set & image_id_set

    print(f"Number of JSON files: {len(json_id_set)}")
    print(f"Number of face images: {len(image_id_set)}")
    print(f"Number of matches: {len(matched_ids)}")

    # Move face images missing JSON
    if json_missing_for_images:
        print(f"Moving {len(json_missing_for_images)} face images missing JSON files...")
        for missing_id in json_missing_for_images:
            src_path = os.path.join(image_dir, image_ids[missing_id])
            dst_path = os.path.join(unlinked_dir, image_ids[missing_id])
            shutil.move(src_path, dst_path)
            print(f"Moved image file: {image_ids[missing_id]}")

    # Move JSON files missing face images
    if images_missing_for_json:
        print(f"Moving {len(images_missing_for_json)} JSON files missing face images...")
        for missing_id in images_missing_for_json:
            src_path = os.path.join(json_dir, json_ids[missing_id])
            dst_path = os.path.join(unlinked_dir, json_ids[missing_id])
            shutil.move(src_path, dst_path)
            print(f"Moved JSON file: {json_ids[missing_id]}")

    if not json_missing_for_images and not images_missing_for_json:
        print("All files are properly linked.")

# Example usage:
json_folder = "user_data"
faces_folder = "faces"
unlinked_folder = "unlinked_data"

move_unlinked_files(json_folder, faces_folder, unlinked_folder)

Number of JSON files: 60
Number of face images: 58
Number of matches: 52
Moving 6 face images missing JSON files...
Moved image file: 0f8a349d-3d68-4ea1-84f2-744c68fa8770.jpg
Moved image file: 517eb3a9-9067-4dd9-b730-e493bef2fe35.jpg
Moved image file: 41948b13-7341-4bf5-86b3-fad9b3deb97d.jpg
Moved image file: 1820dea5-0695-4f1d-ac2d-1529eb970a2f.jpg
Moved image file: 035efbec-f5c6-415f-bff1-88b9cc1b976c.jpg
Moved image file: c95cea3d-f740-41d5-8519-5fcaff1733e4.jpeg
Moving 8 JSON files missing face images...
Moved JSON file: fb8201f2-51d8-4fcd-9a38-f0294376d9e7.json
Moved JSON file: 4b39387f-2baf-49ad-8ebd-eb641a3dd42e.json
Moved JSON file: 5a5a3daa-37bd-4262-957d-dcb7a4ec2a4d.json
Moved JSON file: 9130b0bc-f675-48ba-b7ef-190a278a4d24.json
Moved JSON file: c086b229-6ffa-487c-ba1f-5c1634eed5f3.json
Moved JSON file: 962e6320-6039-4d40-b866-0f4b7521d6db.json
Moved JSON file: f818a43f-79f3-43ef-8384-3ae2e3cb219f.json
Moved JSON file: 06760fcf-a992-4250-be07-aed0ba857cf4.json


In [None]:
import json

def load_full_ranking_df(json_folder):
    ranking_data = []
    for filename in os.listdir(json_folder):
        if filename.endswith('json'):
            filepath = os.path.join(json_folder, filename)
            with open(filepath, 'r') as f:
                data = json.load(f)
            user_id = data.get('user_id')
            ranking_list = data.get('ranking', [])

            ranking_entry = {'user_id': user_id}

