Skip to content
Merged
52 changes: 36 additions & 16 deletions label_studio_ml/examples/langchain_search_agent/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@
from uuid import uuid4
from typing import List, Dict, Optional, Any
from label_studio_ml.model import LabelStudioMLBase
from langchain.tools import Tool
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.callbacks.base import BaseCallbackHandler
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI


# Import langchain components - use new API (v1.0+)
from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain_core.callbacks import BaseCallbackHandler
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
from langchain_core.tools import Tool


from label_studio_ml.utils import match_labels

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -82,17 +86,16 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -
func=search.run,
callbacks=[search_results]
)]
llm = OpenAI(
llm = ChatOpenAI(
temperature=0,
model_name='gpt-3.5-turbo-instruct'
model="gpt-3.5-turbo"
)
agent = initialize_agent(
tools,
llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True,
max_iterations=3,
early_stopping_method="generate",

# Use new agent API (langchain 1.0+)
agent = create_agent(
model=llm,
tools=tools,
debug=True
)

labels = self.parsed_label_config[from_name]['labels']
Expand Down Expand Up @@ -121,7 +124,24 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -
text = self.preload_task_data(task, task['data'][value])
full_prompt = self.PROMPT_TEMPLATE.format(prompt=prompt, text=text)
logger.info(f'Full prompt: {full_prompt}')
llm_result = agent.run(full_prompt)
# Invoke the agent with the prompt
result = agent.invoke({"messages": [("user", full_prompt)]})
# Extract the response from the agent result
if isinstance(result, dict) and "messages" in result:
# Get the last message which should be the agent's response
messages = result["messages"]
if messages:
last_message = messages[-1]
if hasattr(last_message, 'content'):
llm_result = last_message.content
elif isinstance(last_message, dict) and 'content' in last_message:
llm_result = last_message['content']
else:
llm_result = str(last_message)
else:
llm_result = str(result)
else:
llm_result = str(result)
output_classes = match_labels(llm_result, labels)
snippets = search_results.snippets
logger.debug(f'LLM result: {llm_result}')
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
langchain
langchain_community
langchain_core
langchain_openai
google-api-python-client
openai

28 changes: 22 additions & 6 deletions label_studio_ml/examples/mmdetection-3/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ ARG TEST_ENV
WORKDIR /app

# To fix GPG key error when running apt-get update
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub \
&& apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
RUN apt-get update && apt-get install -y --no-install-recommends wget gnupg ca-certificates \
&& rm -rf /var/lib/apt/lists/*

RUN wget -qO - https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub | gpg --dearmor -o /usr/share/keyrings/nvidia.gpg \
&& wget -qO - https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub | gpg --dearmor -o /usr/share/keyrings/nvidia-ml.gpg

# Update the base OS
RUN --mount=type=cache,target="/var/cache/apt",sharing=locked \
Expand Down Expand Up @@ -38,6 +41,10 @@ ENV PYTHONUNBUFFERED=1 \
RUN --mount=type=cache,target=$PIP_CACHE_DIR \
pip install -U pip

# Install numpy early to avoid dependency conflicts
RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
pip install "numpy~=1.26"

# Install base requirements
COPY requirements-base.txt .
RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
Expand All @@ -48,20 +55,29 @@ COPY requirements.txt .
RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
pip install -r requirements.txt

# Install test requirements if needed
# Install test requirements if needed (install before mim to ensure numpy is available)
COPY requirements-test.txt .
# build only when TEST_ENV="true"
RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
if [ "$TEST_ENV" = "true" ]; then \
pip install -r requirements-test.txt; \
fi

RUN mim install mmengine==0.10.3
RUN mim install mmdet==3.3.0
RUN mim download mmdet --config yolov3_mobilenetv2_8xb24-320-300e_coco --dest .
# Ensure numpy is available and pinned before mim installs (mim packages may depend on it)
RUN python -c "import numpy; print(f'numpy version: {numpy.__version__}')" || pip install "numpy~=1.26"

# Install mim packages, but prevent numpy from being upgraded
RUN mim install mmengine==0.10.3 && \
mim install mmcv==2.1.0 && \
mim install mmdet==3.3.0 && \
pip install --force-reinstall --no-deps "numpy~=1.26" || true
RUN mim download mmdet --config yolov3_mobilenetv2_8xb24-320-300e_coco --dest .

COPY . .

# Final verification that numpy is available (important for tests)
RUN python -c "import numpy; print(f'✓ numpy {numpy.__version__} is available')" || (echo "ERROR: numpy is not available!" && exit 1)

EXPOSE 9090

CMD gunicorn --preload --bind :$PORT --workers $WORKERS --threads $THREADS --timeout 0 _wsgi:app
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pytest
pytest-cov
numpy~=1.26
1 change: 0 additions & 1 deletion label_studio_ml/examples/mmdetection-3/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
boto3>=1.26.103,<2.0.0
openmim~=0.3.9
mmcv>=2.0.0rc4,<2.2.0
numpy~=1.26


6 changes: 6 additions & 0 deletions label_studio_ml/examples/mmdetection-3/test_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import requests

# Ensure numpy is available before importing mmdetection (which depends on mmdet)
try:
import numpy
except ImportError:
raise ImportError("numpy is not available. Please install numpy before running tests.")

from mmdetection import MMDetection

from label_studio_ml.utils import compare_nested_structures
Expand Down
16 changes: 12 additions & 4 deletions label_studio_ml/examples/segment_anything_model/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,32 @@ RUN --mount=type=cache,target="/var/cache/apt",sharing=locked \
apt-get update; \
apt-get upgrade -y; \
apt install --no-install-recommends -y \
wget git libopencv-dev python3-opencv cmake protobuf-compiler; \
wget git libopencv-dev cmake protobuf-compiler binutils patchelf; \
apt-get autoremove -y

# Copy and run the model download script
COPY download_models.sh .
RUN bash /app/download_models.sh

# Install numpy first to avoid conflicts with system numpy
RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
pip install --upgrade pip && \
pip install "numpy>=2,<2.3.0"

# install base requirements
COPY requirements-base.txt .
RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
pip install --upgrade pip && \
pip install -r requirements-base.txt

# install custom requirements
COPY requirements.txt .
RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
pip install -r requirements.txt

# Fix executable stack issue with onnxruntime shared library using patchelf
RUN PYTHON_VER=$(python3 -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')") && \
find /usr/local/lib/python${PYTHON_VER}/site-packages/onnxruntime/capi -name "onnxruntime_pybind11_state*.so" -exec sh -c 'patchelf --clear-execstack "$1" 2>/dev/null || true' _ {} \; || true

# install test requirements if needed
COPY requirements-test.txt .
# build only when TEST_ENV="true"
Expand All @@ -51,8 +59,8 @@ RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \

COPY . .

# Add ONNX model
RUN python3 onnxconverter.py
# Add ONNX model (skip if it fails - not critical for basic functionality)
RUN python3 onnxconverter.py || echo "Warning: ONNX conversion failed, but continuing build"

EXPOSE 9090

Expand Down
27 changes: 19 additions & 8 deletions label_studio_ml/examples/segment_anything_model/onnxconverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,24 @@ def convert(checkpoint_path):
dynamic_axes=dynamic_axes,
)

quantize_dynamic(
model_input=onnx_model_path,
model_output=onnx_model_quantized_path,
optimize_model=True,
per_channel=False,
reduce_range=False,
weight_type=QuantType.QUInt8,
)
# Newer versions of onnxruntime don't have optimize_model parameter
try:
quantize_dynamic(
model_input=onnx_model_path,
model_output=onnx_model_quantized_path,
optimize_model=True,
per_channel=False,
reduce_range=False,
weight_type=QuantType.QUInt8,
)
except TypeError:
# Fallback for newer onnxruntime versions without optimize_model
quantize_dynamic(
model_input=onnx_model_path,
model_output=onnx_model_quantized_path,
per_channel=False,
reduce_range=False,
weight_type=QuantType.QUInt8,
)

convert(VITH_CHECKPOINT)
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
numpy>=2,<2.3.0
label_studio_converter
opencv-python
onnxruntime==1.15.1
onnx
opencv-python-headless>=4.12.0,<5.0.0
onnxruntime>=1.18.0
onnx>=1.15.0
torch==2.0.1
torchvision==0.15.2
gunicorn==22.0.0
Expand All @@ -11,5 +12,3 @@ timm==0.4.12
segment_anything @ git+https://github.com/facebookresearch/segment-anything.git
mobile-sam @ git+https://github.com/ChaoningZhang/MobileSAM.git
label-studio-ml @ git+https://github.com/heartexlabs/label-studio-ml-backend.git

numpy<2
48 changes: 48 additions & 0 deletions label_studio_ml/examples/segment_anything_model/sam_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,52 @@
from label_studio_ml.utils import InMemoryLRUDictCache
from label_studio_sdk._extensions.label_studio_tools.core.utils.io import get_local_path

# Monkey-patch torch.as_tensor to handle numpy 2.x compatibility
_original_as_tensor = torch.as_tensor
def _patched_as_tensor(data, dtype=None, device=None):
"""Patched version of torch.as_tensor that handles numpy 2.x compatibility"""
if isinstance(data, np.ndarray):
# For numpy 2.x compatibility, ensure arrays are properly converted
if dtype is None and data.dtype == np.uint8:
# Explicitly convert uint8 arrays
return _original_as_tensor(data.copy(), dtype=torch.uint8, device=device)
elif dtype is not None:
# If dtype is specified, ensure the array is compatible
if data.dtype == np.float32 and dtype == torch.int:
# Convert float32 to int properly
return _original_as_tensor(data.astype(np.int32), dtype=dtype, device=device)
return _original_as_tensor(data, dtype=dtype, device=device)
torch.as_tensor = _patched_as_tensor

# Also patch tensor.numpy() to handle numpy 2.x compatibility
_original_tensor_numpy = torch.Tensor.numpy
def _patched_tensor_numpy(self, *args, **kwargs):
"""Patched version of tensor.numpy() that handles numpy 2.x compatibility"""
try:
return _original_tensor_numpy(self, *args, **kwargs)
except RuntimeError as e:
if "Numpy is not available" in str(e):
# Fallback: manually convert tensor to numpy array
# This is a workaround for numpy 2.x compatibility issues
arr = self.detach().cpu().contiguous()
# Convert to list first, then to numpy array
if arr.dim() == 0:
return np.array(arr.item())
else:
# Map torch dtypes to numpy dtypes
dtype_map = {
torch.float32: np.float32,
torch.float64: np.float64,
torch.int32: np.int32,
torch.int64: np.int64,
torch.uint8: np.uint8,
torch.bool: np.bool_,
}
np_dtype = dtype_map.get(arr.dtype, None)
return np.array(arr.tolist(), dtype=np_dtype)
raise
torch.Tensor.numpy = _patched_tensor_numpy

logger = logging.getLogger(__name__)
_MODELS_DIR = pathlib.Path(__file__).parent / "models"

Expand Down Expand Up @@ -91,6 +137,8 @@ def set_image(self, img_path, calculate_embeddings=True, task=None):
)
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Ensure image is contiguous and properly typed for numpy 2.x compatibility
image = np.ascontiguousarray(image, dtype=np.uint8)
self.predictor.set_image(image)
payload = {'image_shape': image.shape[:2]}
logger.debug(f'Finished set_image({img_path}) in `IN_MEM_CACHE`: image shape {image.shape[:2]}')
Expand Down
9 changes: 7 additions & 2 deletions label_studio_ml/examples/spacy/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ RUN --mount=type=cache,target="/var/cache/apt",sharing=locked \
apt-get update; \
apt-get upgrade -y; \
apt install --no-install-recommends -y \
git; \
git \
build-essential \
gcc \
g++; \
apt-get autoremove -y

# install base requirements
Expand All @@ -32,7 +35,9 @@ RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
# install custom requirements
COPY requirements.txt .
RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
pip install -r requirements.txt
pip install -r requirements.txt && \
apt-get purge -y build-essential gcc g++ && \
apt-get autoremove -y

# install test requirements if needed
COPY requirements-test.txt .
Expand Down
2 changes: 1 addition & 1 deletion label_studio_ml/examples/spacy/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
gunicorn==23.0.0
spacy~=3.6
label-studio-ml @ git+https://github.com/HumanSignal/label-studio-ml-backend.git@master
numpy~=1.26
numpy>=2,<2.3.0


10 changes: 5 additions & 5 deletions label_studio_ml/examples/tesseract/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,19 @@ services:
# Do not use 'localhost' as it does not work within Docker containers.
# Use prefix 'http://' or 'https://' for the URL always.
# Determine the actual IP using 'ifconfig' (Linux/Mac) or 'ipconfig' (Windows).
- LABEL_STUDIO_HOST=
- LABEL_STUDIO_HOST=http://app.heartex.com
# specify the access token for the label studio if you use file upload
- LABEL_STUDIO_ACCESS_TOKEN=
# set these variables to use Minio as a storage backend
- AWS_ACCESS_KEY_ID=your-MINIO_ROOT_USER
- AWS_SECRET_ACCESS_KEY=your-MINIO_ROOT_PASSWORD
- AWS_ENDPOINT=http://host.docker.internal:9000
- AWS_ACCESS_KEY_ID=
- AWS_SECRET_ACCESS_KEY=
- AWS_ENDPOINT
extra_hosts:
- "host.docker.internal:host-gateway" # for macos and unix

minio:
container_name: minio
image: bitnami/minio:latest
image: minio/minio:latest
environment:
- MINIO_ROOT_USER=<admin username>
- MINIO_ROOT_PASSWORD=<admin password>
Expand Down
Loading