In [40]:
import fog_x 
import os
from logging import getLogger
import numpy as np

class BaseLoader():
    def __init__(self, data_path):
        super(BaseLoader, self).__init__()
        self.data_dir = data_path
        self.logger = getLogger(__name__)


    def __len__(self):
        raise NotImplementedError

    def __getitem__(self, idx):
        raise NotImplementedError

    def __iter___(self):
        raise NotImplementedError
    
class RTXLoader(BaseLoader):
    def __init__(self, data_path, split = 'train[:50]'):
        super(RTXLoader, self).__init__(data_path)
        import tensorflow_datasets as tfds

        builder = tfds.builder_from_directory(data_path)

        self.ds = builder.as_dataset(split=split)
        # https://www.determined.ai/blog/tf-dataset-the-bad-parts
        # data_source = builder.as_data_source()
        # print(data_source)


    def __len__(self):
        return len(self.ds)

    def __getitem__(self, idx):
        return idx
    
    def __iter__(self):
        return self.ds.__iter__()


In [41]:

dataset = RTXLoader(os.path.expanduser("~/datasets/berkeley_autolab_ur5/0.1.0"), split = 'train[:5]')
# RTXLoader("gs://gresearch/robotics/berkeley_autolab_ur5/0.1.0")

I 2024-06-22 09:48:48,542 dataset_info.py:617] Load dataset info from /home/kych/datasets/berkeley_autolab_ur5/0.1.0


I 2024-06-22 09:48:48,598 reader.py:261] Creating a tf.data.Dataset reading 2 files located in folders: /home/kych/datasets/berkeley_autolab_ur5/0.1.0.
I 2024-06-22 09:48:48,646 logging_logger.py:49] Constructing tf.data.Dataset berkeley_autolab_ur5 for split train[:5], from /home/kych/datasets/berkeley_autolab_ur5/0.1.0


In [42]:

class BaseExporter():
    def __init__(self):
        super(BaseExporter, self).__init__()
        self.logger = getLogger(__name__)

    def export(self, loader: BaseLoader, output_path: str):
        raise NotImplementedError
        


In [61]:
import av
import pickle 

class MKVExporter(BaseExporter):
    def __init__(self):
        super(MKVExporter, self).__init__()

    # Function to create a frame from numpy array
    def create_frame(self, image_array, stream):
        frame = av.VideoFrame.from_ndarray(np.array(image_array), format='rgb24')
        frame.pict_type = 'NONE'
        frame.time_base = stream.time_base
        return frame
    
    # Function to create a frame from numpy array
    def create_frame_depth(self, image_array, stream):
        image_array = np.array(image_array)
        # if float, convert to uint8
        if image_array.dtype == np.float32:
            image_array = (image_array * 255).astype(np.uint8)
        # if 3 dim, convert to 2 dim
        if len(image_array.shape) == 3:
            image_array = image_array[:,:,0]
        frame = av.VideoFrame.from_ndarray(image_array, format='gray')
        frame.pict_type = 'NONE'
        frame.time_base = stream.time_base
        return frame

    def export(self, loader: BaseLoader, output_path: str):
        # Create an output container
        i = -1
        for traj_tensor in loader:
            i += 1
            trajectory = dict(traj_tensor)
            output = av.open(f'{output_path}/output_{i}.mkv', mode='w')
            # Define video streams (assuming images are 640x480 RGB)
            video_stream_1 = output.add_stream('libx264', rate=10)
            video_stream_1.width = 640
            video_stream_1.height = 480
            video_stream_1.pix_fmt = 'yuv420p'

            video_stream_2 = output.add_stream('libx264', rate=1)
            video_stream_2.width = 640
            video_stream_2.height = 480
            video_stream_2.pix_fmt = 'yuv420p'

            # Define custom data stream for vectors
            depth_stream = output.add_stream('libx264', rate=1)

            data_stream = output.add_stream('rawvideo', rate=1)

            ts = 0
            # convert step data to stream
            for step_tensor in trajectory["steps"]:
                step = dict(step_tensor)
                # non_image_data_step = dict(step_tensor)
                # non_image_data_step["observation"].pop("image")
                # non_image_data_step["observation"].pop("hand_image")
                # non_image_data_step["observation"].pop("image_with_depth")
                # non_image_data_bytes = str(non_image_data_step).encode()
                # print(step)
                obesrvation = step_tensor["observation"].copy()
                obesrvation.pop("image")
                obesrvation.pop("hand_image")
                obesrvation.pop("image_with_depth")
                non_image_data_step = step.copy()
                non_image_data_step["observation"] = obesrvation

                non_image_data_bytes = pickle.dumps(non_image_data_step)
                packet = av.Packet(non_image_data_bytes)
                packet.stream = data_stream
                packet.pts = ts
                output.mux(packet)


                image =np.array(step["observation"]["image"])
                # Create a frame from the numpy array
                frame = self.create_frame(image, video_stream_1)
                frame.pts = ts
                packet = video_stream_1.encode(frame)
                
                output.mux(packet)

                hand_image =np.array(step["observation"]["hand_image"])
                # Create a frame from the numpy array
                frame = self.create_frame(hand_image, video_stream_2)
                frame.pts = ts
                packet = video_stream_2.encode(frame)
                output.mux(packet)

                # # Create a frame from the numpy array
                frame = self.create_frame_depth(step["observation"]["image_with_depth"], depth_stream)
                # frame.pts = ts
                # Encode the frame
                packet = depth_stream.encode(frame)
                # Write the packet to the output file
                output.mux(packet)

                ts += 1



            output.close()
            print(ts)


exporter = MKVExporter()
output_path = os.path.expanduser("~") + "/fog_x/examples/dataloader/mkv_output/"
exporter.export(dataset,output_path)




2024-06-22 09:57:32.410997: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


71


2024-06-22 09:57:33.193706: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


76


2024-06-22 09:57:34.034370: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


81


2024-06-22 09:57:34.866546: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


80
123


2024-06-22 09:57:36.165707: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-06-22 09:57:36.169930: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [62]:
class MKVLoader(BaseLoader):
    def __init__(self, data_path):
        super(MKVLoader, self).__init__(data_path)
        self.files = os.listdir(data_path)
        self.index = 0

    def __len__(self):
        return len(self.ds)

    def __getitem__(self, idx):
        return idx
    
    def __iter__(self):
        return self

    def __next__(self):
        if self.index < len(self.files):
            result = self.files[self.index]
            self.index += 1
            return result
        else:
            raise StopIteration


In [63]:
input_container = av.open("./mkv_output/output_2.mkv")
video_stream1 = input_container.streams.video[0] 
video_stream2 = input_container.streams.video[1] 
depth_stream = input_container.streams.video[2] 
data_stream = input_container.streams[3] 

decoded_stream_1 = []
decoded_stream_2 = []
decoded_stream_depth = []
decoded_stream_data = []

pkt_counter = 0
for packet in input_container.demux(video_stream1, video_stream2, depth_stream, data_stream):
    pkt_counter += 1
    if packet.stream.index == video_stream1.index: 
        frame = packet.decode()
        print(frame)
        if frame:
            for f in frame:
                image = f.to_ndarray(format='rgb24')
                decoded_stream_1.append(image)
    elif packet.stream.index == video_stream2.index:
        frame = packet.decode()
        print(frame)
        if frame:
            for f in frame:
                image = f.to_ndarray(format='rgb24')
                decoded_stream_2.append(image)
    elif packet.stream.index == depth_stream.index:
        frame = packet.decode()
        if frame:
            for f in frame:
                image = f.to_ndarray(format='gray')
                decoded_stream_depth.append(image)
    elif packet.stream.index == data_stream.index:
        packet_in_bytes = bytes(packet)
        if packet_in_bytes:
            non_dict = pickle.loads(packet_in_bytes)
            decoded_stream_data.append(non_dict)
    else:
        print("Unknown stream")
        
input_container.close()

pkt_counter, len(decoded_stream_1), len(decoded_stream_2), len(decoded_stream_depth), len(decoded_stream_data)


[]
[]
[<av.VideoFrame #0, pts=0 yuv420p 640x480 at 0x7d4229d85070>]
[<av.VideoFrame #1, pts=0 yuv420p 640x480 at 0x7d4204986420>]
[<av.VideoFrame #2, pts=0 yuv420p 640x480 at 0x7d4229d87530>]
[<av.VideoFrame #3, pts=0 yuv420p 640x480 at 0x7d4204986500>]
[<av.VideoFrame #4, pts=0 yuv420p 640x480 at 0x7d4204986490>]
[<av.VideoFrame #5, pts=0 yuv420p 640x480 at 0x7d41ccf63ae0>]
[<av.VideoFrame #6, pts=0 yuv420p 640x480 at 0x7d41ccf611c0>]
[<av.VideoFrame #7, pts=0 yuv420p 640x480 at 0x7d420495fca0>]
[<av.VideoFrame #8, pts=0 yuv420p 640x480 at 0x7d420495f7d0>]
[<av.VideoFrame #9, pts=0 yuv420p 640x480 at 0x7d421023a490>]
[<av.VideoFrame #10, pts=0 yuv420p 640x480 at 0x7d4210239230>]
[<av.VideoFrame #11, pts=0 yuv420p 640x480 at 0x7d421023ae30>]
[<av.VideoFrame #12, pts=0 yuv420p 640x480 at 0x7d421023ac70>]
[<av.VideoFrame #13, pts=0 yuv420p 640x480 at 0x7d421003f450>]
[<av.VideoFrame #14, pts=0 yuv420p 640x480 at 0x7d421003de70>]
[<av.VideoFrame #15, pts=0 yuv420p 640x480 at 0x7d421003e50

(205, 40, 40, 40, 81)

In [7]:
list(input_container.streams)

[<av.VideoStream #0 h264, yuv420p 640x480 at 0x7d4229e588e0>,
 <av.VideoStream #1 h264, yuv420p 640x480 at 0x7d4229e58040>,
 <av.VideoStream #2 h264, yuv420p 640x480 at 0x7d4229e5abc0>,
 <av.VideoStream #3 rawvideo, yuv420p 640x480 at 0x7d4229e58fa0>]