In [1]:
import os
import numpy as np
import albumentations as A
import shutil
from pathlib import Path
from natsort import natsorted
from tqdm import tqdm
from pathlib import Path
from EvEye.utils.tonic.functional.ToFrameStack import to_frame_stack_numpy
from EvEye.utils.tonic.slicers.SliceEventsAtIndices import slice_events_at_timepoints
from EvEye.utils.tonic.slicers.SliceWithTimestampAndCount import (
    slice_events_by_timestamp_and_count,
)
from EvEye.utils.cache.MemmapCacheStructedEvents import (
    load_memmap,
)
from EvEye.utils.processor.TxtProcessor import TxtProcessor 
from EvEye.utils.cache.MemmapCacheStructedEvents import *
from EvEye.utils.visualization.visualization import *

In [2]:
root_path = Path("/mnt/data2T/junyuan/Datasets/datasets/MemmapDavisEyeEllipseDataset")
data_path = root_path / "cached_data"
ellipse_path = root_path / "cached_ellipse"
time_window = 10000
events_count = 1000
sensor_size = (346, 260, 2)
events_interpolation = "causal_linear"
num_train_frames = 20000
num_val_frames = 5000
batch_size = 5000

In [3]:
def load_events(file_id):
    batch_id = file_id // 50
    event_id = file_id % 50
    events_batch_path = data_path / f"events_batch_{batch_id}.memmap"
    events_info_path = data_path / f"events_batch_info_{batch_id}.txt"
    event_indices_path = data_path / f"events_indices_{batch_id}.npy"
    events = load_memmap(events_batch_path, events_info_path)
    start_index, end_index = np.load(event_indices_path)[event_id]
    event = events[start_index:end_index]

    return event

def load_events_from_txt(txt_path):
    events = TxtProcessor(txt_path).load_events_from_txt()

    return events

def load_event_segment(index, data_base_path, batch_size=5000):
    data_base_path = Path(data_base_path)
    batch_id = index // batch_size
    event_id = index % batch_size
    events_batch_path = data_base_path / f"events_batch_{batch_id}.memmap"
    events_info_path = data_base_path / f"events_batch_info_{batch_id}.txt"
    event_indices_path = data_base_path / f"events_indices_{batch_id}.npy"
    events = load_memmap(events_batch_path, events_info_path)
    start_index, end_index = np.load(event_indices_path)[event_id]
    event_segment = events[start_index:end_index]

    return event_segment


def load_ellipse(index, ellipse_base_path):
    ellipse_base_path = Path(ellipse_base_path)
    ellipses_path = ellipse_base_path / f"ellipses_batch_0.memmap"
    ellipses_info_path = ellipse_base_path / f"ellipses_batch_info_0.txt"
    ellipses = load_memmap(ellipses_path, ellipses_info_path)
    ellipse = ellipses[index]

    return ellipse


def get_nums(ellipse_path):
    num_frames_list = []

    ellipses_list = load_cached_structed_ellipses(ellipse_path)
    for ellipses in ellipses_list:
        num_frames_list.append(len(ellipses))

    return num_frames_list, sum(num_frames_list)


def get_index(file_lens, index):
    file_lens_cumsum = np.cumsum(np.array(file_lens))
    file_id = np.searchsorted(file_lens_cumsum, index, side="right")
    sample_id = index - file_lens_cumsum[file_id - 1] if file_id > 0 else index

    return file_id, sample_id


def load_cached_structed_ellipses(ellipses_path):
    ellipses_list = []
    ellipses_path = Path(ellipses_path)
    ellipses_paths = natsorted(ellipses_path.glob("ellipses_batch_*.memmap"))
    ellipses_info_paths = natsorted(ellipses_path.glob("ellipses_batch_info_*.txt"))
    ellipses_indices_paths = natsorted(ellipses_path.glob("ellipses_indices_*.npy"))

    for ellipses_path, ellipses_info_path, ellipses_indices_path in zip(
        ellipses_paths, ellipses_info_paths, ellipses_indices_paths
    ):
        ellipses = load_memmap(ellipses_path, ellipses_info_path)
        ellipses_indices = np.load(ellipses_indices_path)

        for index in range(ellipses_indices.shape[0]):
            start_index, end_index = ellipses_indices[index]
            ellipses_list.append(ellipses[start_index:end_index])

    return ellipses_list

In [4]:
def get_fixed_time_dataset(
    split, time_window, num_train_frames, num_val_frames, ellipse_path, output_base_path
):
    output_data_path = output_base_path / split / "cached_data"
    output_ellipse_path = output_base_path / split / "cached_ellipse"
    os.makedirs(output_data_path, exist_ok=True)
    os.makedirs(output_ellipse_path, exist_ok=True)

    if split == "train":
        nums = [0, num_train_frames]
    elif split == "val":
        nums = [num_train_frames, num_train_frames + num_val_frames]

    num_frames_list, total_frames = get_nums(ellipse_path)
    event_segment_list = []
    ellipse_segment_list = []
    batch_counter = 0
    for index in tqdm(range(nums[0], nums[1])):
        file_id, sample_id = get_index(num_frames_list, index)
        events = load_events(file_id)
        ellipse = load_cached_structed_ellipses(ellipse_path)[file_id][sample_id]
        end_event_time = ellipse["t"]
        start_event_time = end_event_time - time_window
        event_segment = slice_events_at_timepoints(
            events, start_event_time, end_event_time
        )
        event_segment_list.append(event_segment)
        ellipse_segment_list.append(ellipse)

        if (index + 1) % batch_size == 0 or (index + 1) == total_frames - 1:
            events_merged = merge_structed_arrays(event_segment_list)
            event_indices = get_indices(event_segment_list)
            create_memmap(
                events_merged,
                f"{output_data_path}/events_batch_{batch_counter}.memmap",
                f"{output_data_path}/events_batch_info_{batch_counter}.txt",
            )
            np.save(
                f"{output_data_path}/events_indices_{batch_counter}.npy", event_indices
            )
            event_segment_list = []
            batch_counter += 1

    ellipses_merged = merge_structed_arrays(ellipse_segment_list)
    ellipses_indices = get_indices(ellipse_segment_list)
    create_memmap(
        ellipses_merged,
        f"{output_ellipse_path}/ellipses_batch_0.memmap",
        f"{output_ellipse_path}/ellipses_batch_info_0.txt",
    )
    np.save(
        f"{output_ellipse_path}/ellipses_indices_0.npy",
        ellipses_indices,
    )

In [5]:
"""For fixed time dataset"""

# output_base_path = Path("/mnt/data2T/junyuan/Datasets/FixedTime10000Dataset")
# get_fixed_time_dataset(
#     "train", 10000, num_train_frames, num_val_frames, ellipse_path, output_base_path
# )
# get_fixed_time_dataset(
#     "val", 10000, num_train_frames, num_val_frames, ellipse_path, output_base_path
# )

'For fixed time dataset'

In [6]:
def get_fixed_count_dataset(
    split,
    events_count,
    num_train_frames,
    num_val_frames,
    ellipse_path,
    output_base_path,
):
    output_data_path = output_base_path / split / "cached_data"
    output_ellipse_path = output_base_path / split / "cached_ellipse"
    os.makedirs(output_data_path, exist_ok=True)
    os.makedirs(output_ellipse_path, exist_ok=True)

    if split == "train":
        nums = [0, num_train_frames]
    elif split == "val":
        nums = [num_train_frames, num_train_frames + num_val_frames]

    num_frames_list, total_frames = get_nums(ellipse_path)
    event_segment_list = []
    ellipse_segment_list = []
    batch_counter = 0
    for index in tqdm(range(nums[0], nums[1])):
        file_id, sample_id = get_index(num_frames_list, index)
        events = load_events(file_id)
        ellipse = load_cached_structed_ellipses(ellipse_path)[file_id][sample_id]
        end_event_time = ellipse["t"]
        event_segment = slice_events_by_timestamp_and_count(
            events, end_event_time, events_count
        )
        event_segment_list.append(event_segment)
        ellipse_segment_list.append(ellipse)

        if (index + 1) % batch_size == 0 or (index + 1) == total_frames - 1:
            events_merged = merge_structed_arrays(event_segment_list)
            event_indices = get_indices(event_segment_list)
            create_memmap(
                events_merged,
                f"{output_data_path}/events_batch_{batch_counter}.memmap",
                f"{output_data_path}/events_batch_info_{batch_counter}.txt",
            )
            np.save(
                f"{output_data_path}/events_indices_{batch_counter}.npy", event_indices
            )
            event_segment_list = []
            batch_counter += 1

    ellipses_merged = merge_structed_arrays(ellipse_segment_list)
    ellipses_indices = get_indices(ellipse_segment_list)
    create_memmap(
        ellipses_merged,
        f"{output_ellipse_path}/ellipses_batch_0.memmap",
        f"{output_ellipse_path}/ellipses_batch_info_0.txt",
    )
    np.save(
        f"{output_ellipse_path}/ellipses_indices_0.npy",
        ellipses_indices,
    )

In [7]:
"""For fixed count dataset"""

# output_base_path = Path("/mnt/data2T/junyuan/Datasets/FixedCount1000Dataset")
# get_fixed_count_dataset(
#     "train", events_count, num_train_frames, num_val_frames, ellipse_path, output_base_path
# )
# get_fixed_count_dataset(
#     "val", events_count, num_train_frames, num_val_frames, ellipse_path, output_base_path
# )

'For fixed count dataset'

In [11]:
def get_fixed_count_dataset_from_txt(
    split,
    events_count,
    events_path,
    ellipse_path,
    output_base_path,
):
    output_data_path = output_base_path / split / "cached_data"
    output_ellipse_path = output_base_path / split / "cached_ellipse"
    os.makedirs(output_data_path, exist_ok=True)
    os.makedirs(output_ellipse_path, exist_ok=True)

    events = TxtProcessor(events_path).load_events_from_txt()
    ellipses = TxtProcessor(ellipse_path).load_ellipses_from_txt()
    total_frames = len(ellipses)
    num_train_frames = int(total_frames * 0.8)
    num_val_frames = total_frames - num_train_frames

    if split == "train":
        nums = [0, num_train_frames]
    elif split == "val":
        nums = [num_train_frames, num_train_frames + num_val_frames]
        
    event_segment_list = []
    ellipse_segment_list = []
    batch_counter = 0
    for index in tqdm(range(nums[0], nums[1])):
        ellipse = ellipses[index]
        end_event_time = ellipse["t"]
        event_segment = slice_events_by_timestamp_and_count(
            events, end_event_time, events_count
        )
        event_segment_list.append(event_segment)
        ellipse_segment_list.append(ellipse)

    events_merged = merge_structed_arrays(event_segment_list)
    event_indices = get_indices(event_segment_list)
    create_memmap(
        events_merged,
        f"{output_data_path}/events_batch_{batch_counter}.memmap",
        f"{output_data_path}/events_batch_info_{batch_counter}.txt",
    )
    np.save(
        f"{output_data_path}/events_indices_{batch_counter}.npy", event_indices
    )
    event_segment_list = []
    batch_counter += 1

    ellipses_merged = merge_structed_arrays(ellipse_segment_list)
    ellipses_indices = get_indices(ellipse_segment_list)
    create_memmap(
        ellipses_merged,
        f"{output_ellipse_path}/ellipses_batch_0.memmap",
        f"{output_ellipse_path}/ellipses_batch_info_0.txt",
    )
    np.save(
        f"{output_ellipse_path}/ellipses_indices_0.npy",
        ellipses_indices,
    )

In [12]:
events_path  = Path("/mnt/data2T/junyuan/eye-tracking/DeanDataset/events.txt")
ellipse_path = Path("/mnt/data2T/junyuan/eye-tracking/DeanDataset/ellipses.txt")
output_base_path = Path("/mnt/data2T/junyuan/eye-tracking/DeanDataset")


get_fixed_count_dataset_from_txt("train", 5000, events_path, ellipse_path, output_base_path)
get_fixed_count_dataset_from_txt("val", 5000, events_path, ellipse_path, output_base_path)

100%|██████████| 824/824 [00:00<00:00, 49834.27it/s]
Merging arrays...: 100%|██████████| 824/824 [00:00<00:00, 5045.03it/s]
Getting indices...: 824it [00:00, 375296.61it/s]
Merging arrays...: 100%|██████████| 824/824 [00:00<00:00, 450131.09it/s]
Getting indices...: 824it [00:00, 328496.01it/s]
100%|██████████| 206/206 [00:00<00:00, 72485.46it/s]
Merging arrays...: 100%|██████████| 206/206 [00:00<00:00, 4488.29it/s]
Getting indices...: 206it [00:00, 290039.15it/s]
Merging arrays...: 100%|██████████| 206/206 [00:00<00:00, 288104.91it/s]
Getting indices...: 206it [00:00, 302424.44it/s]


In [None]:
def get_event_unet_dataset(
    split,
    events_count,
    num_train_frames,
    num_val_frames,
    ellipse_path,
    output_base_path,
):
    output_data_path = output_base_path / split / "data"
    output_ellipse_path = output_base_path / split / "label"
    os.makedirs(output_data_path, exist_ok=True)
    os.makedirs(output_ellipse_path, exist_ok=True)

    if split == "train":
        nums = [0, num_train_frames]
    elif split == "val":
        nums = [num_train_frames, num_train_frames + num_val_frames]

    num_frames_list, _ = get_nums(ellipse_path)

    for index in tqdm(range(nums[0], nums[1])):
        file_id, sample_id = get_index(num_frames_list, index)
        events = load_events(file_id)
        ellipse = load_cached_structed_ellipses(ellipse_path)[file_id][sample_id]
        end_event_time = ellipse["t"]
        event_segment = slice_events_by_timestamp_and_count(
            events, end_event_time, events_count
        )
        event_frame = to_frame_stack_numpy(
            event_segment,
            (346, 260, 2),
            1,
            "causal_linear",
            event_segment['t'][0],
            event_segment['t'][-1],
            10,
        )
        if split == "val":
            index = index - num_train_frames
        event_frame_vis = visualize(event_frame)
        save_image(event_frame_vis, f"{output_data_path}/{index:05}.png")

        ellipse = convert_to_ellipse(ellipse)
        canvas = np.zeros((260, 346, 3), dtype=np.uint8)
        cv2.ellipse(canvas, ellipse, (255, 255, 255), -1)
        save_image(canvas, f"{output_ellipse_path}/{index:05}.png")

In [None]:
"""For event unet dataset"""

output_base_path = Path("/mnt/data2T/junyuan/Datasets/EventUNetDataset")
# get_event_unet_dataset(
#     split="train",
#     events_count=2000,
#     num_train_frames=num_train_frames,
#     num_val_frames=num_val_frames,
#     ellipse_path=ellipse_path,
#     output_base_path=output_base_path,
# )
# get_event_unet_dataset(
#     split="val",
#     events_count=2000,
#     num_train_frames=num_train_frames,
#     num_val_frames=num_val_frames,
#     ellipse_path=ellipse_path,
#     output_base_path=output_base_path,
# )

In [None]:
def list_subfolders(directory):
    directory_path = Path(directory)
    subfolders = natsorted([f for f in directory_path.iterdir() if f.is_dir()])
    return subfolders


def find_and_copy_file(subfolders, timestamp, target_path, index):
    target_path = Path(target_path)

    for subfolder in subfolders:
        png_file = subfolder / f"{timestamp}.png"
        if png_file.exists():
            output_path = target_path / f"{index:05}.png"
            shutil.copy(png_file, output_path)
        else:
            continue


def get_rgb_unet_dataset(
    split,
    raw_path,
    num_train_frames,
    num_val_frames,
    ellipse_path,
    output_base_path,
):
    output_data_path = output_base_path / split / "data"
    output_ellipse_path = output_base_path / split / "label"
    os.makedirs(output_data_path, exist_ok=True)
    os.makedirs(output_ellipse_path, exist_ok=True)

    if split == "train":
        nums = [0, num_train_frames]
    elif split == "val":
        nums = [num_train_frames, num_train_frames + num_val_frames]
    num_frames_list, _ = get_nums(ellipse_path)

    for index in tqdm(range(nums[0], nums[1])):
        file_id, sample_id = get_index(num_frames_list, index)
        ellipse = load_cached_structed_ellipses(ellipse_path)[file_id][sample_id]
        timestamp = ellipse["t"]
        if split == "val":
            index = index - num_train_frames
        subfolders = list_subfolders(raw_path)
        find_and_copy_file(subfolders, timestamp, output_data_path, index)
        ellipse = convert_to_ellipse(ellipse)
        canvas = np.zeros((260, 346, 3), dtype=np.uint8)
        cv2.ellipse(canvas, ellipse, (255, 255, 255), -1)
        save_image(canvas, f"{output_ellipse_path}/{index:05}.png")

In [None]:
"""For RGB UNet dataset"""

output_base_path = Path("/mnt/data2T/junyuan/Datasets/RGBUNetDataset")
raw_path = Path('/mnt/data2T/junyuan/Datasets/datasets/DavisEyeCenterDatasetFrames')
get_rgb_unet_dataset(
    split="train",
    raw_path=raw_path,
    num_train_frames=num_train_frames,
    num_val_frames=num_val_frames,
    ellipse_path=ellipse_path,
    output_base_path=output_base_path,
)
# get_rgb_unet_dataset(
#     split="val",
#     raw_path=raw_path,
#     num_train_frames=num_train_frames,
#     num_val_frames=num_val_frames,
#     ellipse_path=ellipse_path,
#     output_base_path=output_base_path,
# )

In [None]:
from pathlib import Path
from tqdm import tqdm


def rename_png_files(directory):
    # 获取目录路径
    root_dir = Path(directory)

    # 递归遍历目录下的所有png文件
    for png_file in tqdm(root_dir.rglob("*.png")):
        # 获取文件的stem部分
        stem = png_file.stem

        # 分割stem并获取第二个部分
        new_stem = stem.split("_")[1]

        # 构建新的文件名
        new_filename = new_stem + png_file.suffix

        # 重命名文件
        png_file.rename(png_file.parent / new_filename)
        # print(f"Renamed {png_file} to {new_filename}")


# 调用函数并传入目录路径
rename_png_files("/mnt/data2T/junyuan/Datasets/datasets/DavisEyeCenterDatasetFrames")

In [None]:
index = 8999
event_segment_time = load_event_segment(
    index, "/mnt/data2T/junyuan/Datasets/FixedTime10000Dataset/train/cached_data", 5000
)
ellipse_time = load_ellipse(
    index, "/mnt/data2T/junyuan/Datasets/FixedTime10000Dataset/train/cached_ellipse"
)

In [None]:
index = 8999
event_segment_count = load_event_segment(
    index, "/mnt/data2T/junyuan/Datasets/FixedCount10000Dataset/train/cached_data", 5000
)
ellipse_count = load_ellipse(
    index, "/mnt/data2T/junyuan/Datasets/FixedCount10000Dataset/train/cached_ellipse"
)

In [None]:
ellipse_time, ellipse_count

In [None]:
event_segment_time.shape, event_segment_count.shape

In [None]:
event_frame = to_frame_stack_numpy(
    event_segment_time,
    (346, 260, 2),
    1,
    "causal_linear",
    event_segment_time['t'][0],
    event_segment_time['t'][-1],
    10,
)
event_frame_vis = visualize(event_frame)

In [None]:
event_segment_count_frame = to_frame_stack_numpy(
    event_segment_count,
    (346, 260, 2),
    1,
    "causal_linear",
    event_segment_count['t'][0],
    event_segment_count['t'][-1],
    10,
)
event_frame_count_vis = visualize(event_frame)

In [None]:
event_frame.max(), event_frame_vis.max(), event_segment_count_frame.max(), event_frame_count_vis.max()

In [None]:
import matplotlib.pyplot as plt

ellipse = convert_to_ellipse(ellipse_time)
draw_ellipse(event_frame_count_vis, ellipse)
plt.imshow(event_frame_count_vis)