In [1]:
import glob
import torchvision
import hashlib
import json
import multiprocessing
import os
import shutil
from multiprocessing import Pool
from tqdm.notebook import trange, tqdm

import eye_detector
from IPython.display import display
import torchvision.transforms.functional as TF
import torch

In [2]:
base_dir = "/mnt/wsl/PHYSICALDRIVE3p2/processed_dataset"
kwd = "/mnt/wsl/PHYSICALDRIVE3p1/datasets/**/*.[jJ][pP][gG]"
face_size=256

In [3]:
def get_file_hash(filename):
    h = hashlib.new("md5")
    h.update(filename.encode("utf-8"))
    return h.hexdigest()

In [4]:
def is_small_img(img, thres):
    return img.width < thres or img.height < thres

In [5]:
def process_and_save(filename):
    id = get_file_hash(filename)
    metadata_filename = "{}/metadata/{}.json".format(base_dir, id)
    if os.path.exists(metadata_filename):
        # print("duplicated:", metadata_filename)
        return
    people = []
    orig_metadata_filename = filename + ".json"
    try:
        with open(orig_metadata_filename, "r") as f:
            j = json.load(f)
            people = [person["name"] for person in j["people"] if person["name"] is not None]
            if len(people) == 0 and len(j["people"]) != 0:
                print("unknown person: ", j["people"])
    except:
        pass

    faces_meta = []
    no_face = True
    for angle in (0, 90, 180, 270):
        (raw_image, faces, face_images) = eye_detector.process(filename, angle)
        for faceno, face_detected_img in enumerate(face_images):
            if is_small_img(face_detected_img, 300):
                continue
            face_detected_filename = "{}/face-detected/{}-{}-{:02x}.png".format(base_dir, id, angle, faceno)
            face_detected_img.save(face_detected_filename, format="png", compress_level=1)
            no_face=False
        for faceno, (accurate_face, raw_points, left_eye_center, right_eye_center) in enumerate(faces):
            if is_small_img(accurate_face, 100):
                continue
            faces_meta.append(
                {
                    "bbox": {"ltx": raw_points[0][0], "lty": raw_points[0][1], "lbx": raw_points[1][0], "lby": raw_points[1][1], "rbx": raw_points[2][0], "rby": raw_points[2][1], "rtx": raw_points[3][0], "rty": raw_points[3][1]},
                    "eyes": {"lx": left_eye_center[0], "ly": left_eye_center[1], "rx": right_eye_center[0], "ry": right_eye_center[1]},
                    "angle": angle,
                }
            )
            # heatmap_filename = "{}/eye-label/{}-{}-{:02x}.png".format(base_dir, id, angle, faceno)
            # heatmap.save(heatmap_filename, format="png", compress_level=1)

            face_filename = "{}/face-cropped/{}-{}-{:02x}.png".format(base_dir, id, angle, faceno)
            accurate_face.save(face_filename, format="png", compress_level=1)
            resized = accurate_face.resize((face_size,face_size))
            resized_filename = "{}/face-resized/{}-{}-{:02x}.pt".format(base_dir, id, angle, faceno)
            resized = TF.to_tensor(resized)
            torch.save(resized, resized_filename)
    if no_face:
        noface_image_filename = "{}/noface/{}.jpg".format(base_dir, id)
        os.symlink(filename, noface_image_filename)
    metadata = {"id": id, "filename": filename, "faces": faces_meta, "people": people}

    with open(metadata_filename, "w") as fd:
        json.dump(metadata, fd)

In [6]:
def recreate_dir(dirname):
        dirname = base_dir + "/" + dirname
        shutil.rmtree(dirname)
        os.mkdir(dirname)

In [7]:
def delete_all_file():
    recreate_dir("metadata")
    recreate_dir("face-detected")
    recreate_dir("noface")
    recreate_dir("face-cropped")
    recreate_dir("face-resized")

# delete_all_file()

In [8]:
cpu_count = 4

In [9]:
files = glob.glob(kwd, recursive=True)
with tqdm(total=len(files)) as t:
    with Pool(cpu_count) as p:
        for _ in p.imap_unordered(process_and_save, files):
            t.update(1)

  0%|          | 0/43881 [00:00<?, ?it/s]



In [None]:
for unz in glob.glob(kwd, recursive=True)[425:436]:
    process_and_save(unz)