Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions fastdeploy/input/encodings/ernie_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,12 @@ def add_processed_video(self, frames_cache, outputs, uuid, token_len=None):

def load_video(self, url, item):
from fastdeploy.input.utils.render_timestamp import render_frame_timestamp
from fastdeploy.input.utils.video import read_frames_decord, read_video_decord
from fastdeploy.input.utils.video import (
read_frames_paddlecodec,
read_video_paddlecodec,
)

reader, meta, path = read_video_decord(url, save_to_disk=False)
reader, meta, path = read_video_paddlecodec(url, save_to_disk=False)

video_frame_args = {
"fps": item.get("fps", self.fps),
Expand All @@ -189,7 +192,7 @@ def load_video(self, url, item):
}
video_frame_args = self.set_video_frame_args(video_frame_args, meta)

frames_data, _, timestamps = read_frames_decord(
frames_data, _, timestamps = read_frames_paddlecodec(
path,
reader,
meta,
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/input/encodings/paddleocr_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from fastdeploy.input.encodings.registry import EncodingRegistry
from fastdeploy.input.mm_model_config import PADDLEOCR_VL
from fastdeploy.input.utils import IDS_TYPE_FLAG
from fastdeploy.input.utils.video import read_video_decord
from fastdeploy.input.utils.video import read_video_paddlecodec
from fastdeploy.input.utils.video import sample_frames_paddleocr as _sample_paddleocr
from fastdeploy.multimodal.hasher import MultimodalHasher

Expand Down Expand Up @@ -154,7 +154,7 @@ def add_processed_video(self, frames_cache, outputs, uuid, token_len=None):
outputs["vit_position_ids"].append(np.arange(numel) % numel)

def load_video(self, url, item):
reader, meta, _ = read_video_decord(url, save_to_disk=False)
reader, meta, _ = read_video_paddlecodec(url, save_to_disk=False)

fps = item.get("fps", self.fps)
num_frames = item.get("target_frames", self.target_frames)
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/input/encodings/qwen_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from fastdeploy.input.encodings.registry import EncodingRegistry
from fastdeploy.input.mm_model_config import QWEN3_VL, QWEN_VL
from fastdeploy.input.utils import IDS_TYPE_FLAG
from fastdeploy.input.utils.video import read_video_decord
from fastdeploy.input.utils.video import read_video_paddlecodec
from fastdeploy.input.utils.video import sample_frames_qwen as _sample_qwen
from fastdeploy.multimodal.hasher import MultimodalHasher

Expand Down Expand Up @@ -152,7 +152,7 @@ def add_processed_video(self, frames_cache, outputs, uuid, token_len=None):
outputs["fps"].append(fps)

def load_video(self, url, item):
reader, meta, _ = read_video_decord(url, save_to_disk=False)
reader, meta, _ = read_video_paddlecodec(url, save_to_disk=False)

fps = item.get("fps", self.fps)
num_frames = item.get("target_frames", self.target_frames)
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/input/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
)
from fastdeploy.input.utils.video import (
VideoReaderWrapper,
read_video_decord,
read_video_paddlecodec,
sample_frames,
sample_frames_paddleocr,
sample_frames_qwen,
Expand All @@ -34,7 +34,7 @@
"process_stop_token_ids",
"validate_model_path",
"VideoReaderWrapper",
"read_video_decord",
"read_video_paddlecodec",
"sample_frames",
"sample_frames_paddleocr",
"sample_frames_qwen",
Expand Down
89 changes: 64 additions & 25 deletions fastdeploy/input/utils/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

"""Shared video utilities: VideoReaderWrapper, read_video_decord, sample_frames, read_frames_decord."""
"""Shared video utilities: VideoReaderWrapper, read_video_paddlecodec, sample_frames, read_frames_paddlecodec."""

import datetime
import hashlib
Expand All @@ -26,19 +26,22 @@
from typing import Optional, Union

import numpy as np
import paddle
from PIL import Image

from fastdeploy.input.image_processors.common import ceil_by_factor, floor_by_factor
from fastdeploy.utils import data_processor_logger
from fastdeploy.utils import data_processor_logger, get_logger

logger = get_logger("video_utils")

__all__ = [
"VideoReaderWrapper",
"read_video_decord",
"read_video_paddlecodec",
"sample_frames",
"sample_frames_qwen",
"sample_frames_paddleocr",
"get_frame_indices",
"read_frames_decord",
"read_frames_paddlecodec",
"EXTRACTED_FRAME_DIR",
"get_filename",
]
Expand All @@ -54,15 +57,20 @@ def _is_gif(data: bytes) -> bool:
return data[:6] in (b"GIF87a", b"GIF89a")


class VideoReaderWrapper:
"""decord.VideoReader wrapper that fixes a memory leak and adds GIF support.
class _NumpyFrame:
"""Wrapper so that frame[idx].asnumpy() keeps working with paddlecodec."""

Reference: https://github.com/dmlc/decord/issues/208
"""
def __init__(self, array):
self._array = array

def asnumpy(self):
return self._array

def __init__(self, video_path, *args, **kwargs):
import decord

class VideoReaderWrapper:
"""paddlecodec VideoDecoder wrapper with GIF support."""

def __init__(self, video_path, *args, **kwargs):
try:
# moviepy 1.0
import moviepy.editor as mp
Expand Down Expand Up @@ -101,22 +109,53 @@ def __init__(self, video_path, *args, **kwargs):
video_path = mp4_path
self.original_file = video_path # temp mp4, cleaned up in __del__

self._reader = decord.VideoReader(video_path, *args, **kwargs)
self._reader.seek(0)
with paddle.use_compat_guard(enable=True, scope={"torchcodec"}):
try:
import sys

from torchcodec.decoders import VideoDecoder

sys.modules["torchcodec"] = None

This comment was marked as outdated.

This comment was marked as outdated.

This comment was marked as outdated.

except (ImportError, RuntimeError) as e:

This comment was marked as outdated.

logger.error(
f"Failed to load 'torchcodec' backend via Paddle proxy.\n"
f" - Common Causes:\n"
f" 1. Conflict with official 'torch' or 'torchcodec' packages.\n"
f" 2. Missing FFmpeg libraries or System library mismatch (CXXABI).\n"
f" - Recommended Fix Steps:\n"
f" 1. Install dependencies: `conda install ffmpeg -c conda-forge` or `apt-get update && apt-get install ffmpeg` \n"
f" 2. Uninstall conflicts: `pip uninstall torchcodec paddlecodec -y`\n"
f" 3. Reinstall packages: `pip install paddlecodec --force-reinstall`\n"
f" - If you encounter 'CXXABI' or 'libstdc++' errors, your system libraries might be outdated.\n"
f" Try prioritizing Conda libraries by running: `LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH python your_script.py`\n"
f" - Original Error: {e}"
)
raise
PADDLECODEC_NUM_THREADS = int(os.environ.get("PADDLECODEC_NUM_THREADS", 0))
self._decoder = VideoDecoder(

This comment was marked as outdated.

video_path,
seek_mode="exact",
num_ffmpeg_threads=PADDLECODEC_NUM_THREADS,
device=kwargs.get("device", "cpu"),
dimension_order="NHWC",
)

def __len__(self):
return len(self._reader)
return self._decoder.metadata.num_frames

def __getitem__(self, key):
frames = self._reader[key]
self._reader.seek(0)
return frames
if isinstance(key, (int, np.integer)):
frame = self._decoder.get_frames_at(indices=[int(key)]).data[0]

This comment was marked as outdated.

return _NumpyFrame(frame.numpy())
if isinstance(key, slice):
indices = list(range(*key.indices(len(self))))
else:
indices = list(key) if not isinstance(key, list) else key
frames = self._decoder.get_frames_at(indices=indices).data

This comment was marked as outdated.

return _NumpyFrame(frames.numpy())

def get_avg_fps(self):

This comment was marked as outdated.

return self._reader.get_avg_fps()

def seek(self, pos):
return self._reader.seek(pos)
return self._decoder.metadata.average_fps

def __del__(self):
original_file = getattr(self, "original_file", None)
Expand All @@ -128,11 +167,11 @@ def __del__(self):


# ---------------------------------------------------------------------------
# read_video_decord
# read_video_paddlecodec
# ---------------------------------------------------------------------------


def read_video_decord(video_path, save_to_disk: bool = False):
def read_video_paddlecodec(video_path, save_to_disk: bool = False):
"""Load a video file and return (video_reader, video_meta, video_path).

video_meta contains keys: "fps", "duration", "num_of_frame".
Expand Down Expand Up @@ -306,7 +345,7 @@ def get_filename(url=None):


# ---------------------------------------------------------------------------
# get_frame_indices / read_frames_decord
# get_frame_indices / read_frames_paddlecodec
# (migrated from ernie4_5_vl_processor/process_video.py)
# ---------------------------------------------------------------------------

Expand Down Expand Up @@ -376,7 +415,7 @@ def get_frame_indices(
return frame_indices


def read_frames_decord(
def read_frames_paddlecodec(
video_path,
video_reader,
video_meta,
Expand All @@ -389,7 +428,7 @@ def read_frames_decord(
frame_indices=None,
tol=10,
):
"""Read frames from a video using decord, with retry logic for corrupt frames."""
"""Read frames from a video using paddlecodec, with retry logic for corrupt frames."""
if cache_dir is None:
cache_dir = EXTRACTED_FRAME_DIR

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ xlwt
visualdl
setuptools-scm>=8
prometheus-client
decord
paddlecodec
moviepy
triton
crcmod
Expand Down
2 changes: 1 addition & 1 deletion requirements_dcu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ xlwt
visualdl
setuptools-scm>=8
prometheus-client
decord
paddlecodec
moviepy
use-triton-in-paddle
crcmod
Expand Down
2 changes: 1 addition & 1 deletion requirements_iluvatar.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ xlwt
visualdl
setuptools-scm>=8
prometheus-client
decord
paddlecodec
moviepy
triton
crcmod
Expand Down
2 changes: 1 addition & 1 deletion requirements_metaxgpu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ xlwt
visualdl
setuptools-scm>=8
prometheus-client
decord
paddlecodec
moviepy
triton
use-triton-in-paddle
Expand Down
Loading
Loading