In [1]:
import os
import h5py
import cv2

def load_hdf5(dataset_dir, dataset_name):
    dataset_path = os.path.join(dataset_dir, dataset_name + '.hdf5')
    if not os.path.isfile(dataset_path):
        print(f'Dataset does not exist at \n{dataset_path}\n')
        raise FileNotFoundError

    with h5py.File(dataset_path, 'r') as root:
        compressed = root.attrs.get('compress', False)
        qpos = root['/observations/qpos'][()]
        qvel = root['/observations/qvel'][()]
        action = root['/action'][()]
        image_dict = dict()
        for cam_name in root[f'/observations/images/'].keys():
            image_dict[cam_name] = root[f'/observations/images/{cam_name}'][()]
        if compressed:
            compress_len = root['/compress_len'][()]

    if compressed:
        for cam_id, cam_name in enumerate(image_dict.keys()):
            # un-pad and uncompress
            padded_compressed_image_list = image_dict[cam_name]
            image_list = []
            for frame_id, padded_compressed_image in enumerate(padded_compressed_image_list): # [:1000] to save memory
                compressed_image = padded_compressed_image
                image = cv2.imdecode(compressed_image, 1)
                image_list.append(image)
            image_dict[cam_name] = image_list

    return qpos, qvel, action, image_dict


# dataset_dir = '/home/weixun/testing/avdc/datasets/hdf5_datasets/data'
# dataset_name = 'episode_20'
# qpos, qvel, action, image_dict = load_hdf5(dataset_dir, dataset_name)
# print(qpos.shape)
# print(qvel.shape)
# print(action.shape)
# print(image_dict.keys())
# print(len(image_dict['cam_high']))
# print(image_dict['cam_high'][0].shape)

In [3]:
import numpy as np

def create_video_from_np_arrays(np_arrays, video_file, target_size=None, fps=50, square=False, skip_frames=3):
    # Check if there are any arrays
    if not np_arrays:
        print("No numpy arrays provided")
        return
    if target_size is None:
        target_size = np_arrays[0].shape[:2]

    # Get size from the first array if not provided
    # height, width, _ = np_arrays[0].shape
    print("Shape Incoming: ", np_arrays[0].shape)
    slicing_tuple = None

    # if square, make the width and height the same by making the width shorter
    if square:
        img_sz = np_arrays[0].shape
        # Get the larger dimension 
        dim_to_reduce = np.argmax(img_sz)
        other = 1 if dim_to_reduce == 0 else 0

        border = (img_sz[dim_to_reduce] - img_sz[other]) // 2
        border_left = border
        border_right = border + (img_sz[dim_to_reduce] - img_sz[other]) % 2
        slice_range = slice(border_left, img_sz[dim_to_reduce] - border_right)
        slicing_tuple = tuple(slice_range if i == dim_to_reduce else slice(None) for i in range(3))

    new_lst = []
    jump = skip_frames + 1
    # Resize all arrays if needed
    for i in range(len(np_arrays)):
        if i % jump != 0:
            continue
        view_ = np_arrays[i][slicing_tuple] if slicing_tuple else np_arrays[i] 
        new_lst.append(cv2.resize(view_[:, :, ::-1], (target_size[1], target_size[0]))) # BGR to RGB and resize
        # np_arrays[i] = cv2.resize(np_arrays[i], target_size)  # Resize array to match video size
            
        # print("New Shape: ", size)
    print("Old Frame Count:", len(np_arrays) , " New Expected Frame Count, Actual: ", len(np_arrays)//jump, " , ", len(new_lst))
    print("New Shape: ", new_lst[0].shape)

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Use 'XVID' for .avi files or 'mp4v' for .mp4
    video = cv2.VideoWriter(video_file, fourcc, fps, (target_size[1], target_size[0]))
    
    for array in new_lst:
        video.write(array)

    video.release()

In [4]:
import time
# dataset_dir = '/home/weixun/testing/avdc/datasets/hdf5_datasets/data'
dataset_dir='/home/weixun/testing/avdc/datasets/hdf5_datasets/transfer_cube'
dataset_name = 'episode_20'
qpos, qvel, action, image_dict = load_hdf5(dataset_dir, dataset_name)
cam = 'cam_low'

for f in os.listdir(dataset_dir):
    if not f.endswith('.hdf5'):
        continue
    dataset_name = f.split('.')[0]
    print("Processing ", dataset_name)
    start_time = time.time()
    out_dir = os.path.join(dataset_dir, f'{dataset_name}_{cam}.mp4')
    qpos, qvel, action, image_dict = load_hdf5(dataset_dir, dataset_name)
    print("Loaded hdf5, took ", time.time() - start_time)
    start_time = time.time()
    create_video_from_np_arrays(image_dict[cam], out_dir, fps=50)
    print("Processed Video", dataset_name, " took ", time.time() - start_time)
# dataset_name = "episode_20"
# print("Processing ", dataset_name)
# out_dir = os.path.join(dataset_dir, f'{dataset_name}_{cam}.mp4')
# create_video_from_np_arrays(image_dict[cam], out_dir, fps=50, target_size=(128, 128), square=True)

Processing  episode_14
Loaded hdf5, took  4.4258034229278564
Shape Incoming:  (480, 640, 3)
Old Frame Count: 1600  New Expected Frame Count, Actual:  400  ,  400
New Shape:  (480, 640, 3)
Processed Video episode_14  took  0.85148024559021
Processing  episode_25
Loaded hdf5, took  3.3004820346832275
Shape Incoming:  (480, 640, 3)
Old Frame Count: 1600  New Expected Frame Count, Actual:  400  ,  400
New Shape:  (480, 640, 3)
Processed Video episode_25  took  0.7931482791900635
Processing  episode_18
Loaded hdf5, took  3.7412238121032715
Shape Incoming:  (480, 640, 3)
Old Frame Count: 1600  New Expected Frame Count, Actual:  400  ,  400
New Shape:  (480, 640, 3)
Processed Video episode_18  took  0.8531761169433594
Processing  episode_13
Loaded hdf5, took  3.193129539489746
Shape Incoming:  (480, 640, 3)
Old Frame Count: 1600  New Expected Frame Count, Actual:  400  ,  400
New Shape:  (480, 640, 3)
Processed Video episode_13  took  0.7722103595733643
Processing  episode_5
Loaded hdf5, took

In [34]:
# Split the dataset into 3 parts:
# - Pick up orange object with right arm | pick_up_orange_object_with_right_arm
# - Hand over orange object to left arm | hand_over_orange_object_to_left_arm
# - Place orange object in the blue square on the table | place_orange_object_in_blue_square_on_table_with_left_arm
# Creating 3 directories with the respective videos
parent_dir = os.path.join(dataset_dir, 'vidgen_datasets')
rhp_dir = os.path.join(parent_dir, 'right_hand_pick')
hol_dir = os.path.join(parent_dir, 'hand_over_left')
pob_dir = os.path.join(parent_dir, 'place_on_blue')

for d in [parent_dir, rhp_dir, hol_dir, pob_dir]:
    os.makedirs(d, exist_ok=True)

In [60]:
# Spliting the videos into 3 smaller videos, split them equally.
# First 1/3 of the video is for right hand pick, next 1/3 is for hand over left, last 1/3 is for place on blue
# Process each video file
for f in os.listdir(dataset_dir):
    if not f.endswith('.mp4'):
        continue
    
    print("Processing ", f)
    cap = cv2.VideoCapture(os.path.join(dataset_dir, f))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    print("Total Frames: ", total_frames)
    
    # Define VideoWriters for the three output videos
    rhp_writer = cv2.VideoWriter(os.path.join(rhp_dir, f), cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))
    hol_writer = cv2.VideoWriter(os.path.join(hol_dir, f), cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))
    pob_writer = cv2.VideoWriter(os.path.join(pob_dir, f), cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))
    
    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        if frame_count < total_frames // 3:
            rhp_writer.write(frame)
        elif frame_count < 9 * total_frames // 12: # Adjusted ratio to allow for more frames in hand over left
            hol_writer.write(frame)
        else:
            pob_writer.write(frame)
        
        frame_count += 1
    
    # Release everything
    cap.release()
    rhp_writer.release()
    hol_writer.release()
    pob_writer.release()


Processing  episode_38_cam_low.mp4
Total Frames:  400
Processing  episode_0_cam_low.mp4
Total Frames:  400
Processing  episode_25_cam_low.mp4
Total Frames:  400
Processing  episode_9_cam_low.mp4
Total Frames:  400
Processing  episode_31_cam_low.mp4
Total Frames:  400
Processing  episode_47_cam_low.mp4
Total Frames:  400
Processing  episode_26_cam_low.mp4
Total Frames:  400
Processing  episode_34_cam_low.mp4
Total Frames:  400
Processing  episode_32_cam_low.mp4
Total Frames:  400
Processing  episode_21_cam_low.mp4
Total Frames:  400
Processing  episode_14_cam_low.mp4
Total Frames:  400
Processing  episode_2_cam_low.mp4
Total Frames:  400
Processing  episode_39_cam_low.mp4
Total Frames:  400
Processing  episode_1_cam_low.mp4
Total Frames:  400
Processing  episode_37_cam_low.mp4
Total Frames:  400
Processing  episode_19_cam_low.mp4
Total Frames:  400
Processing  episode_12_cam_low.mp4
Total Frames:  400
Processing  episode_17_cam_low.mp4
Total Frames:  400
Processing  episode_7_cam_low.mp

In [None]:
# Creating the dataset for vidgen, 

0

In [23]:
import sys
def add_to_python_path(path):
    # Get the absolute path to ensure we are checking the correct path
    absolute_path = os.path.abspath(path)
    
    # Check if the path is already in sys.path
    if absolute_path in sys.path:
        print(f"The path '{absolute_path}' is already in the Python path.")
    else:
        # If not, add it to sys.path
        sys.path.append(absolute_path)
        print(f"The path '{absolute_path}' has been added to the Python path.")
add_to_python_path("/home/weixun/testing/avdc")

The path '/home/weixun/testing/avdc' has been added to the Python path.


In [36]:
import importlib
import flowdiffusion.datasets
importlib.reload(flowdiffusion.datasets)
del SequentialDatasetNp

In [37]:
# Import the module and reload it to ensure changes are applied
from flowdiffusion.datasets import SequentialDatasetNp
from torch.utils.data import DataLoader


file_path = './datasets/bridge/numpy/bridge_data_v1/berkeley/toykitchen4/put_banana_in_pot_or_pan/val'
dataset = SequentialDatasetNp(file_path)
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)
print("Length of dataloader: ", len(dataloader))


Preparing dataset...
SEQLEN:  0


ValueError: num_samples should be a positive integer value, but got num_samples=0

## Kitting Task Video Separation
**Doing 6 separations:**
1. Start to Right hand first grab + dt frames (First Sharp Increase)
2. Up until Right hand first release + dt frames
3. Up until Left hand first grab + dt frames
4. Up until Left hand first release + dt frames
5. Up until Right hand last grab + dt frames
6. Last grab to end of video last release + dt frames to end of vid (If the last grab is before the last release, take the 2nd last grab)
    - Last grab and release threshold 0.02

~ Kinda like Regex

#### Small Notes:
1. Should also try to make IK model predict whether or not gripper is open or closed even if it can't infer information from image? 
    - Try making it learn and one not making it learn.
    

In [None]:
import os
import h5py
import cv2

def load_hdf5(dataset_dir, dataset_name):
    dataset_path = os.path.join(dataset_dir, dataset_name)
    if not os.path.isfile(dataset_path):
        print(f'Dataset does not exist at \n{dataset_path}\n')
        raise FileNotFoundError

    with h5py.File(dataset_path, 'r') as root:
        compressed = root.attrs.get('compress', False)
        qpos = root['/observations/qpos'][()]
        qvel = root['/observations/qvel'][()]
        action = root['/action'][()]
        image_dict = dict()
        for cam_name in root[f'/observations/images/'].keys():
            image_dict[cam_name] = root[f'/observations/images/{cam_name}'][()]
        if compressed:
            compress_len = root['/compress_len'][()]

    if compressed:
        for cam_id, cam_name in enumerate(image_dict.keys()):
            # un-pad and uncompress
            padded_compressed_image_list = image_dict[cam_name]
            image_list = []
            for frame_id, padded_compressed_image in enumerate(padded_compressed_image_list): # [:1000] to save memory
                compressed_image = padded_compressed_image
                image = cv2.imdecode(compressed_image, 1)
                image_list.append(image)
            image_dict[cam_name] = image_list

    return qpos, qvel, action, image_dict

In [None]:
# Grabbing + sharp Drop in Gripper State.

# 