In [1]:
import os
import random
from torch.utils.data import Dataset, DataLoader
import json
from tqdm.auto import tqdm

In [2]:
%cd /workspace/DCVC_Dynamic_Intra_Period

/workspace/DCVC_Dynamic_Intra_Period


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [3]:
def frame_name_by_idx(idx):
    return f"im{str(idx).zfill(5)}.png"

class VideoFrameDataset:
    def __init__(self, root_dir, required_frames, sequences_size = [100, 300], transform=None, seq_types = ["normal", "bounce"]):
        self.root_dir = root_dir
        self.video_folders = [os.path.join(root_dir, folder) for folder in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, folder))]
        self.required_frames = required_frames
        self.transform = transform
        self.sequences_size = sequences_size
        self.seq_types = seq_types

        self.emitted = 0
        self.choose_flow = True # If False continue in the current flow
        self.seq_len = None
        self.growing = True
        self.remaining_in_flow = None
        self.seq_type = None 
        self.last_frame_idx = None
        self.folder = None

    def __len__(self):
        return self.required_frames

    def next(self):
        reference_point = False
        if self.choose_flow:
            self.choose_flow = False
            self.seq_type = random.choice(self.seq_types)
            self.seq_len = min(
                random.randint(self.sequences_size[0], self.sequences_size[1]) * (2 if self.seq_type == "bounce" else 1),
                self.required_frames - self.emitted
            )
            self.remaining_in_flow = self.seq_len
            self.folder = random.choice(self.video_folders)

            folder_size = len([pic for pic in os.listdir(self.folder) if os.path.isfile(os.path.join(self.folder, pic))])
            self.last_frame_idx = random.randint(0, folder_size - (self.seq_len // (2 if self.seq_type == "bounce" else 1)))
            self.growing = True
            reference_point = True
        
        if self.seq_type == "bounce":
            if self.remaining_in_flow * 2 == self.seq_len:
                self.growing = False

        resulted_frame = os.path.join(self.folder, frame_name_by_idx(self.last_frame_idx))

        self.last_frame_idx += 1 if self.growing else -1
        self.remaining_in_flow -= 1
        self.emitted += 1

        if self.remaining_in_flow == 0:
            self.choose_flow = True

        return resulted_frame, reference_point    
          

In [4]:
def produce_syntetic_dataset(frames, source_folder, root_folder, dataset_name, range_size = [100, 300], seq_types = ["normal", "bounce"]):
  folder_path = os.path.join(root_folder, dataset_name)
  os.makedirs(folder_path, exist_ok=True)

  dataset = VideoFrameDataset(source_folder, frames, range_size, seq_types=seq_types)

  ref_points = []

  for i in range(frames):
    frame, reference_point = dataset.next()

    if reference_point:
      ref_points.append(i + 1)

    os.symlink(frame, os.path.join(folder_path, frame_name_by_idx(i + 1)))

  with open(os.path.join(folder_path, "reference_points.json"), "w") as f:
    json.dump(ref_points, f)

  return folder_path

In [28]:
for i in tqdm(range(10), desc="Generating datasets"):
  produce_syntetic_dataset(1800, "/workspace/DCVC_Dynamic_Intra_Period/media/UVG/", "/workspace/DCVC_Dynamic_Intra_Period/media/syntetic/", f"dataset_{i}")

Generating datasets:   0%|          | 0/10 [00:00<?, ?it/s]

In [15]:
for i in tqdm(range(10), desc="Generating datasets"):
  produce_syntetic_dataset(600, "/workspace/DCVC_Dynamic_Intra_Period/media/UVG/", "/workspace/DCVC_Dynamic_Intra_Period/media/syntetic/", f"short_dataset_{i}", [100, 200])

Generating datasets:   0%|          | 0/10 [00:00<?, ?it/s]

In [17]:
for i in tqdm(range(1), desc="Generating datasets"):
  produce_syntetic_dataset(1000, "/workspace/DCVC_Dynamic_Intra_Period/media/UVG/", "/workspace/DCVC_Dynamic_Intra_Period/media/syntetic/", f"frequent_dataset_{i}", [10, 50])

Generating datasets:   0%|          | 0/1 [00:00<?, ?it/s]

In [18]:
for i in tqdm(range(1), desc="Generating datasets"):
  produce_syntetic_dataset(100, "/workspace/DCVC_Dynamic_Intra_Period/media/UVG/", "/workspace/DCVC_Dynamic_Intra_Period/media/syntetic/", f"small_dataset_{i}", [10, 50])

Generating datasets:   0%|          | 0/1 [00:00<?, ?it/s]

In [22]:
for i in tqdm(range(1), desc="Generating datasets"):
  produce_syntetic_dataset(400, "/workspace/DCVC_Dynamic_Intra_Period/media/UVG/", "/workspace/DCVC_Dynamic_Intra_Period/media/syntetic/", f"straight_dataset_{i}", [200, 200], seq_types=["bounce"])

Generating datasets:   0%|          | 0/1 [00:00<?, ?it/s]

In [5]:
for i in tqdm(range(1), desc="Generating datasets"):
  produce_syntetic_dataset(3000, "/workspace/DCVC_Dynamic_Intra_Period/media/UVG/", "/workspace/DCVC_Dynamic_Intra_Period/media/syntetic/", f"predictor_dataset_{i}", [32, 256])

Generating datasets:   0%|          | 0/1 [00:00<?, ?it/s]

In [5]:
for i in tqdm(range(1), desc="Generating datasets"):
  produce_syntetic_dataset(3000, "/workspace/DCVC_Dynamic_Intra_Period/media/UVG-LOW/", "/workspace/DCVC_Dynamic_Intra_Period/media/syntetic-low/", f"predictor_dataset_{i}", [32, 256])

Generating datasets:   0%|          | 0/1 [00:00<?, ?it/s]