In [1]:
import pytesseract
import cv2
import glob
import os
import fnmatch
from multiprocessing.pool import ThreadPool
import multiprocessing
from tqdm import tqdm
import csv

In [2]:
def find_folders(pattern, path):
    result = []
    for root, dirs, files in tqdm(os.walk(path)):
        for name in dirs:
            if fnmatch.fnmatch(name, pattern):
                result.append(os.path.join(root, name))
    return result

def get_timestamps(images):
    
    def process_image(image):
        x, y, w, h = 760, 1080 - 32, 1920 - 760, 32
        im = cv2.imread(image)
        # Just get the timestamp section of image
        ROI = im[y : y + h, x : x + w]
        # Invert colours
        ROI = cv2.bitwise_not(ROI)
        # Add whitespace around text (improves accuracy)
        ROI = cv2.copyMakeBorder(
            ROI, 15, 15, 0, 0, cv2.BORDER_CONSTANT, value=[255, 255, 255]
        )
        unclean_timestamp = pytesseract.image_to_string(ROI, lang='eng')
        clean_timestamp = unclean_timestamp.replace("TLC130 ","").strip()

        return((os.path.basename(image), clean_timestamp))
    

    with ThreadPool(multiprocessing.cpu_count()) as p:
        data = list(tqdm(p.imap(process_image, images), total=len(images))) 
    
    return data

In [3]:
starting_folder = "/mnt/z/Square_Eyes_DP20_Data/Validation Study/Participant Data"

# image_folders = find_folders("*images*", starting_folder)
image_folders = [
  "/mnt/MBData/Square_Eyes_DP20_Data/Test Files/Model Test/0001/Images/images"
]

In [17]:
import numpy as np
def extract_timestamp(image_path: str) -> str:
    image = cv2.imread(image_path)

    x, y, w, h = 760, 1048, 1048, 32
    # Just get the timestamp section of image
    roi = image[y : y + h, x : x + w]
    # Invert colours
    roi = cv2.bitwise_not(roi)
    # Add whitespace around text (improves accuracy)
    roi = cv2.copyMakeBorder(
        roi, 15, 15, 0, 0, cv2.BORDER_CONSTANT, value=[255, 255, 255]
    )
    unclean_timestamp = pytesseract.image_to_string(roi, lang="eng")
    if not unclean_timestamp.startswith("TLC130"):
        return ""
    clean_timestamp = unclean_timestamp.replace("TLC130 ", "").strip()

    return clean_timestamp

In [15]:
test_image = "/mnt/MBData/Square_Eyes_DP20_Data/Test Files/Model Test/0001/Images/images/4002_TLC00001_00004.jpg"


In [16]:
black_pixels = image[black_coords[:, 1], black_coords[:, 0]]
black_pixels

array([[16, 16, 16],
       [16, 16, 16],
       [16, 16, 16],
       [17, 15, 14]], dtype=uint8)

In [18]:
for image_folder in image_folders:
    images = glob.glob(os.path.join(image_folder, "*.jpg"))
    out_data = []
    for image in images:
        timestamp = extract_timestamp(image)
        out_data.append((os.path.basename(image), timestamp))
    with open(os.path.join(image_folder, "timestamps.csv"), "w") as f:
        writer = csv.writer(f)
        writer.writerow(["image", "timestamp"])
        writer.writerows(out_data)

In [8]:
len(out_data)

0

In [4]:
for image_folder in tqdm(image_folders, desc = "Folders"):
    images = glob.glob(image_folder+"/*.jpg")
    data = get_timestamps(images)
    with open(os.path.join(os.path.dirname(image_folder), "timestamps.csv"), "w", encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(["Filename", "Timestamp"])
        writer.writerows(data)

100%|██████████| 40/40 [00:01<00:00, 26.37it/s]
Folders: 100%|██████████| 1/1 [00:01<00:00,  1.96s/it]


In [16]:
import numpy as np
import pandas as pd

In [20]:
df = pd.read_csv("/mnt/z/Square_Eyes_DP20_Data/Validation Study/Participant Data/4005/Images/Visit 1/timestamps.csv")

In [33]:
image = "/mnt/z/Square_Eyes_DP20_Data/Validation Study/Participant Data/4005/Images/Visit 1/images/4005_TLC00001_00010.jpg"

In [34]:
x, y, w, h = 760, 1080 - 32, 1920 - 760, 32
im = cv2.imread(image)
# Just get the timestamp section of image
ROI = im[y : y + h, x : x + w]
# Invert colours
ROI = cv2.bitwise_not(ROI)
# Add whitespace around text (improves accuracy)
ROI = cv2.copyMakeBorder(
    ROI, 15, 15, 0, 0, cv2.BORDER_CONSTANT, value=[255, 255, 255]
)
unclean_timestamp = pytesseract.image_to_string(ROI, lang='eng')
clean_timestamp = unclean_timestamp.replace("TLC130 ","").strip()

In [35]:
clean_timestamp

'2023/03/23 16:28:11'

In [32]:
df.values

array([['4005_TLC00001_00001.jpg', '23/03/2023 16:27'],
       ['4005_TLC00001_00002.jpg', '23/03/2023 16:27'],
       ['4005_TLC00001_00003.jpg', '23/03/2023 16:27'],
       ...,
       ['4005_TLC00003_02895.jpg', '23/03/2023 18:22'],
       ['4005_TLC00003_02896.jpg', '23/03/2023 18:22'],
       ['4005_TLC00003_02897.jpg', '23/03/2023 18:23']], dtype=object)