In [2]:
import gi
gi.require_version('Gst', '1.0')
from gi.repository import Gst
from sensor_msgs.msg import Image
from abc import ABC, abstractmethod
from tqdm import tqdm
import argparse
import cv2
import numpy as np
import os
import re
import rosbag
import threading
import yaml

In [3]:
def load_config(cfg_path: str) -> dict:
    with open(cfg_path) as f:
        cfg: dict = yaml.safe_load(f)
    return cfg

def print_dict(d: dict, indent: int = 0):
    for key, value in d.items():
        if isinstance(value, dict):
            print('  ' * indent + str(key) + ": ")
            print_dict(value, indent+1)
        else:
            print('  ' * indent + str(key) + ": " + str(value))

In [4]:
class MP4Input:
    def __init__(self, name: str, input: dict):
        self.name = name
        self.path = input['path']
        self.type = input['type']
        if input['start']:
            self.start = input['start']
        if input['end']:
            self.end = input['end']

        self.setup()

    def setup(self):
        self.parse_start_time()
        self.gstreamer_pipeline_str = "filesrc location=" + self.path + " ! decodebin ! nvvideoconvert ! video/x-raw,format=RGBA ! fakesink name=" + self.name

    def parse_start_time(self):
        """parse the absolute start time in nanoseconds from the file name
        """
        match = re.search(r'(\d{10}_\d{9})(?=\.\w+$)', self.path)
        assert match is not None, f'{self.path} does not contain the proper timestamp sub-string.'
        time_str = match[1].split('_')
        self.absolute_start_time = int(time_str[0]) * int(1e9) + int(time_str[1])

    def loop(self):
        pass

class Pipeline(threading.Thread):
    def __init__(self, name: str, pipeline: dict):
        self.name = name

    def run(self):
        print(f"Starting {self.name} pipeline")
        self.pipeline = Gst.parse_launch(self.pipeline)
        self.pipeline.set_state(Gst.State.PLAYING)
        self.loop = GLib.MainLoop()
        self.loop.run()

In [5]:
cfg_path="./config/wildfire/subcanopy/extract.yaml"

cfg = load_config(cfg_path)

In [6]:
print(os.path.splitext(cfg["camera_0"]["input"]["path"]))
print(re.search(r'(\d{10}_\d{9})(?=\.\w+$)', cfg["camera_0"]["input"]["path"])[1].split('_'))

('/root/data/wildfire/subcanopy/2023-04-27_FIRE-SGL-228/1/camera_0_1677761985_629121776', '.mp4')
['1677761985', '629121776']


In [7]:
file_type_to_extension = {
    "mp4": ".mp4",
    "rosbag": ".bag",
    "directory": "",
}

def verify_file(path: str, type: str) -> bool:
    # verify the file/directory exists
    if not os.path.exists(path):
        print("file " + path + " does not exist")
        return False

    # verify the file type is supported
    if type not in file_type_to_extension.keys():
        print("file type " + type + " is not supported")
        return False

    # verify the file type matches the extension or the path is a directory
    if type == "directory" and not os.path.isdir(path):
        print(path + " is not a directory")
        return False
    elif not os.path.splitext(path)[-1] == file_type_to_extension[type]:
        print("file " + path + " is not a " + type + " file")
        return False

    return True


def trace_config(cfg: dict) -> bool:
    # verify that the config file contains "pipelines"
    if not ("pipelines" in cfg and cfg["pipelines"]):
        print("pipeline is not specified in config file")
        return False

    print('Loaded config:')
    print("pipelines: ", end="")
    print(cfg["pipelines"])

    for pipeline_name in cfg["pipelines"]:
        if not pipeline_name in cfg:
            print("pipeline " + pipeline_name + " is not specified in config file")
            return False
        
        pipeline = cfg[pipeline_name]

        # verify input
        if not verify_file(path = pipeline["input"]["path"], type = pipeline["input"]["type"]):
            print("input file " + pipeline["input"]["path"] + " is not valid")
            return False
        
        # make directories for output and verify type
        for index, output in pipeline["output"].items():
            # verify the file type is supported
            if output["type"] not in file_type_to_extension.keys():
                print("output " + index + "'sfile type " + output["type"] + " is not supported")
                return False
            
            # make parent directories if they don't exist
            output["directory"] = os.path.dirname(output["path"]) if (output["type"] == "directory") else os.path.dirname(output["path"])
            os.makedirs(output["directory"], exist_ok=True)

        print(pipeline_name + ":")
        print_dict(pipeline, indent=1)

    return True

In [10]:
trace_config(cfg)

Loaded config:
pipelines: ['camera_0', 'camera_1', 'camera_2', 'camera_3']
camera_0:
  input: 
    path: /root/data/wildfire/subcanopy/2023-04-27_FIRE-SGL-228/1/camera_0_1677761985_629121776.mp4
    type: mp4
    start: 100
    end: 500
  output: 
    1: 
      path: /root/data/wildfire/subcanopy/2023-04-27_FIRE-SGL-228/1/camera_0/
      type: directory
      width: 612
      height: 514
      directory: /root/data/wildfire/subcanopy/2023-04-27_FIRE-SGL-228/1/camera_0
    2: 
      path: /root/data/wildfire/subcanopy/2023-04-27_FIRE-SGL-228/1/camera_0.bag
      type: rosbag
      width: 612
      height: 514
      topic: /camera_0
      frame_id: camera_0/optical_frame
      directory: /root/data/wildfire/subcanopy/2023-04-27_FIRE-SGL-228/1
camera_1:
  input: 
    path: /root/data/wildfire/subcanopy/2023-04-27_FIRE-SGL-228/1/camera_1_1677761985_962133776.mp4
    type: mp4
    start: 100
    end: 500
  output: 
    1: 
      path: /root/data/wildfire/subcanopy/2023-04-27_FIRE-SGL-228/1/

True

In [None]:
parser = argparse.ArgumentParser()
parser.add_argument("config", type=str, default="")

args = parser.parse_args()

In [11]:
# Declare and initialize global variables.
anticipated_frame_duration = 0
rel_time = 0

def gstreamer_pipeline(loc):
    return (
        '''
        filesrc location={} !
        decodebin !
        nvvideoconvert !
        video/x-raw,format=RGBA !
        fakesink name=s
        '''.format(loc)
    )

def get_sec_and_nsec_parts_from_filename(fn):
    '''
    This function assumes that fn has the timestamp embedded in a specific pattern.
    The timestamp is represented as two integer numbers connected by an underscore ("_")
    The first integer has 10 digits and the second have 9 digits.

    The return values are the two parts of digits in their raw string format.
    '''
    m = re.search(r'_(\d{10}_\d+)$', fn)
    assert m is not None, f'{fn} does not contain the proper timestamp sub-string. '
    return m[1].split('_')

def on_frame_probe(pad, info):
    buf = info.get_buffer()

    frame = buffer_to_image(buf, pad.get_current_caps())

    # For extracting frames at a specific frame rate
    if not('prev_frame_time' in globals()):
        global prev_frame_time
        prev_frame_time = 0

    # For extracting frames between a specific time interval
    global rel_time, anticipated_frame_duration
    rel_time = buf.pts
    if cfg['start'] <= rel_time*1e-9 < cfg['stop'] or not cfg['extract_specific_time']:
        current_time = start_time + rel_time
        if ( current_time - prev_frame_time ) * 1e-9 >= anticipated_frame_duration:
            frame_name = str(current_time)
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            frame = cv2.resize(frame, (cfg['width'], cfg['height']))
            if cfg['rotate_img']:
                frame = cv2.rotate(frame, cv2.ROTATE_180)
            cv2.imwrite(os.path.join(cfg['save_dir'], frame_name+'.png'), frame)
            prev_frame_time = current_time

    return Gst.PadProbeReturn.OK

def buffer_to_image(buf, caps):
    caps_structure = caps.get_structure(0)
    height, width = caps_structure.get_value('height'), caps_structure.get_value('width')

    is_mapped, map_info = buf.map(Gst.MapFlags.READ)
    if is_mapped:
        try:
            image_array = np.ndarray(
                (height, width, pixel_bytes),
                dtype=np.uint8,
                buffer=map_info.data
            ).copy() # extend array lifetime beyond subsequent unmap
            return image_array[:,:,:3] # RGBA -> RGB
        finally:
            buf.unmap(map_info)

In [None]:
# Populate anticipated_frame_duration from cfg.
anticipated_frame_duration = 1 / cfg['frame_rate'] \
                             if cfg['specific_frame_rate'] \
                             else 0

os.makedirs(cfg['save_dir'], exist_ok=True)

filename = os.path.basename(cfg['video_path'])
filename = os.path.splitext(filename)[0]

# Try to find the timestamp embedded in the filename.
sec_str, nsec_str = get_sec_and_nsec_parts_from_filename(filename)

sec = int(sec_str)
nano_sec = int(nsec_str)
start_time = sec*int(1e9) + nano_sec

pixel_bytes = 4
pipeline_str = gstreamer_pipeline(cfg['video_path'])

Gst.init(None)
pipeline = Gst.parse_launch(pipeline_str)

pipeline.get_by_name('s').get_static_pad('sink').add_probe(
    Gst.PadProbeType.BUFFER,
    on_frame_probe
)

print('Extracting Video ...')
pipeline.set_state(Gst.State.PLAYING)
try:
    while True:
        msg = pipeline.get_bus().timed_pop_filtered(
            Gst.SECOND,
            Gst.MessageType.EOS | Gst.MessageType.ERROR
        )

        # Show something such that the user knows it is doing its work.
        print('.', end='', flush=True)

        if rel_time*1e-9 > cfg['stop'] and cfg['extract_specific_time']:
            print('\nFinished extracting video until {} seconds'.format(cfg['stop']))
            break

        if msg:
            text = msg.get_structure().to_string() if msg.get_structure() else ''
            msg_type = Gst.message_type_get_name(msg.type)
            print(f'{msg.src.name}: [{msg_type}] {text}')
            break
finally:
    pipeline.set_state(Gst.State.NULL)
    print('')
    print('Extraction Complete')
