Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 6 additions & 10 deletions llama_cpp/llama_chat_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -2835,15 +2835,12 @@ def _create_bitmap_from_bytes(self, image_bytes: bytes):

with suppress_stdout_stderr(disable=self.verbose):
# Create bitmap from buffer using helper function
bitmap = self._mtmd_cpp.mtmd_helper_bitmap_init_from_buf(
self.mtmd_ctx,
(ctypes.c_uint8 * len(image_bytes)).from_buffer(bytearray(image_bytes)),
len(image_bytes)
)
n = len(image_bytes)
buf = (ctypes.c_ubyte * n).from_buffer_copy(image_bytes) # makes a copy

if bitmap is None:
bitmap = self._mtmd_cpp.mtmd_helper_bitmap_init_from_buf(self.mtmd_ctx, buf, n)
if not bitmap:
raise ValueError("Failed to create bitmap from image bytes")

return bitmap

def __call__(
Expand Down Expand Up @@ -2965,7 +2962,6 @@ def __call__(

# Reset llama context
llama.reset()
llama._ctx.kv_cache_clear()

# Process each chunk
n_past = llama_cpp.llama_pos(0)
Expand All @@ -2978,7 +2974,7 @@ def __call__(

chunk_type = self._mtmd_cpp.mtmd_input_chunk_get_type(chunk)

if chunk_type == self._mtmd_cpp.MTMD_INPUT_CHUNK_TYPE_TEXT:
if chunk_type == self._mtmd_cpp.mtmd_input_chunk_type.MTMD_INPUT_CHUNK_TYPE_TEXT:
# Handle text chunk
n_tokens_out = ctypes.c_size_t()
tokens_ptr = self._mtmd_cpp.mtmd_input_chunk_get_tokens_text(
Expand All @@ -2995,7 +2991,7 @@ def __call__(
)
llama.eval(tokens)

elif chunk_type in [self._mtmd_cpp.MTMD_INPUT_CHUNK_TYPE_IMAGE, self._mtmd_cpp.MTMD_INPUT_CHUNK_TYPE_AUDIO]:
elif chunk_type in [self._mtmd_cpp.mtmd_input_chunk_type.MTMD_INPUT_CHUNK_TYPE_IMAGE, self._mtmd_cpp.mtmd_input_chunk_type.MTMD_INPUT_CHUNK_TYPE_AUDIO]:
# Handle image/audio chunk using helper
chunk_n_tokens = self._mtmd_cpp.mtmd_input_chunk_get_n_tokens(chunk)

Expand Down
27 changes: 13 additions & 14 deletions llama_cpp/mtmd_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
c_uint32,
c_float,
c_void_p,
c_size_t,
c_ubyte,
POINTER,
_Pointer, # type: ignore
Structure,
Expand Down Expand Up @@ -136,14 +138,13 @@ class mtmd_context_params(Structure):
def mtmd_default_marker() -> c_char_p:
...


# MTMD_API struct mtmd_context_params mtmd_context_params_default(void);
@ctypes_function_mtmd(
"mtmd_context_params_default",
[],
mtmd_context_params_p_ctypes,
mtmd_context_params,
)
def mtmd_context_params_default() -> mtmd_context_params_p:
def mtmd_context_params_default() -> mtmd_context_params:
...


Expand Down Expand Up @@ -370,13 +371,13 @@ def mtmd_input_chunk_get_type(chunk: mtmd_input_chunk_p) -> c_int32:
@ctypes_function_mtmd(
"mtmd_input_chunk_get_tokens_text", [
mtmd_input_chunk_p_ctypes,
POINTER(c_uint),
], c_int32)
POINTER(c_size_t),
], POINTER(c_int32))
def mtmd_input_chunk_get_tokens_text(
chunk: mtmd_input_chunk_p,
n_tokens_output: c_uint,
n_tokens_output: c_size_t,
/,
) -> c_int32:
) -> POINTER(c_int32):
...

# MTMD_API const mtmd_image_tokens * mtmd_input_chunk_get_tokens_image(const mtmd_input_chunk * chunk);
Expand Down Expand Up @@ -609,13 +610,11 @@ def mtmd_helper_bitmap_init_from_file(ctx: mtmd_context_p, fname: c_char_p) -> m
# // this function is thread-safe
# MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigned char * buf, size_t len);
@ctypes_function_mtmd(
"mtmd_helper_bitmap_init_from_buf", [mtmd_context_p_ctypes, c_char_p, c_uint], mtmd_bitmap_p_ctypes)
def mtmd_helper_bitmap_init_from_buf(
ctx: mtmd_context_p,
buf: c_char_p,
len: c_uint,
/,
) -> mtmd_bitmap_p:
"mtmd_helper_bitmap_init_from_buf",
[mtmd_context_p_ctypes, POINTER(c_ubyte), c_size_t],
mtmd_bitmap_p_ctypes
)
def mtmd_helper_bitmap_init_from_buf(ctx, buf, length):
"""
helper function to construct a mtmd_bitmap from a buffer containing a file
supported formats:
Expand Down