From 5591619060e0bdd030498c904fb3f42d3ac42e63 Mon Sep 17 00:00:00 2001 From: Karthik Kalyanaraman Date: Wed, 5 Mar 2025 12:53:31 -0800 Subject: [PATCH] Cleanlab integration --- src/examples/cleanlab_example/simple.py | 18 +++++ .../constants/instrumentation/common.py | 3 +- .../instrumentation/__init__.py | 11 ++- .../instrumentation/cleanlab/__init__.py | 5 ++ .../cleanlab/instrumentation.py | 62 +++++++++++++++ .../instrumentation/cleanlab/patch.py | 75 +++++++++++++++++++ .../instrumentation/graphlit/patch.py | 9 +-- src/langtrace_python_sdk/langtrace.py | 71 ++++-------------- src/langtrace_python_sdk/version.py | 2 +- 9 files changed, 186 insertions(+), 70 deletions(-) create mode 100644 src/examples/cleanlab_example/simple.py create mode 100644 src/langtrace_python_sdk/instrumentation/cleanlab/__init__.py create mode 100644 src/langtrace_python_sdk/instrumentation/cleanlab/instrumentation.py create mode 100644 src/langtrace_python_sdk/instrumentation/cleanlab/patch.py diff --git a/src/examples/cleanlab_example/simple.py b/src/examples/cleanlab_example/simple.py new file mode 100644 index 00000000..d495fef3 --- /dev/null +++ b/src/examples/cleanlab_example/simple.py @@ -0,0 +1,18 @@ +import os + +from cleanlab_tlm import TLM +from dotenv import find_dotenv, load_dotenv + +from langtrace_python_sdk import langtrace + +_ = load_dotenv(find_dotenv()) + +langtrace.init() + +api_key = os.getenv("CLEANLAB_API_KEY") +tlm = TLM(api_key=api_key, options={"log": ["explanation"], "model": "gpt-4o-mini"}) # GPT, Claude, etc +out = tlm.prompt("What's the third month of the year alphabetically?") +trustworthiness_score = tlm.get_trustworthiness_score("What's the first month of the year?", response="January") + +print(out) +print(trustworthiness_score) diff --git a/src/langtrace_python_sdk/constants/instrumentation/common.py b/src/langtrace_python_sdk/constants/instrumentation/common.py index 500473ef..e428bd6d 100644 --- a/src/langtrace_python_sdk/constants/instrumentation/common.py +++ b/src/langtrace_python_sdk/constants/instrumentation/common.py @@ -13,6 +13,8 @@ "ARCH": "Arch", "AZURE": "Azure", "CHROMA": "Chroma", + "CLEANLAB": "CleanLab", + "COHERE": "Cohere", "CREWAI": "CrewAI", "DEEPSEEK": "DeepSeek", "DSPY": "DSPy", @@ -25,7 +27,6 @@ "LLAMAINDEX": "LlamaIndex", "OPENAI": "OpenAI", "PINECONE": "Pinecone", - "COHERE": "Cohere", "PPLX": "Perplexity", "QDRANT": "Qdrant", "WEAVIATE": "Weaviate", diff --git a/src/langtrace_python_sdk/instrumentation/__init__.py b/src/langtrace_python_sdk/instrumentation/__init__.py index 567cdd94..b23f47ca 100644 --- a/src/langtrace_python_sdk/instrumentation/__init__.py +++ b/src/langtrace_python_sdk/instrumentation/__init__.py @@ -1,8 +1,10 @@ +from .agno import AgnoInstrumentation from .anthropic import AnthropicInstrumentation from .autogen import AutogenInstrumentation from .aws_bedrock import AWSBedrockInstrumentation from .cerebras import CerebrasInstrumentation from .chroma import ChromaInstrumentation +from .cleanlab import CleanLabInstrumentation from .cohere import CohereInstrumentation from .crewai import CrewAIInstrumentation from .crewai_tools import CrewaiToolsInstrumentation @@ -10,6 +12,7 @@ from .embedchain import EmbedchainInstrumentation from .gemini import GeminiInstrumentation from .google_genai import GoogleGenaiInstrumentation +from .graphlit import GraphlitInstrumentation from .groq import GroqInstrumentation from .langchain import LangchainInstrumentation from .langchain_community import LangchainCommunityInstrumentation @@ -21,17 +24,12 @@ from .mistral import MistralInstrumentation from .ollama import OllamaInstrumentor from .openai import OpenAIInstrumentation +from .phidata import PhiDataInstrumentation from .pinecone import PineconeInstrumentation from .pymongo import PyMongoInstrumentation from .qdrant import QdrantInstrumentation from .vertexai import VertexAIInstrumentation from .weaviate import WeaviateInstrumentation -from .cerebras import CerebrasInstrumentation -from .milvus import MilvusInstrumentation -from .google_genai import GoogleGenaiInstrumentation -from .graphlit import GraphlitInstrumentation -from .phidata import PhiDataInstrumentation -from .agno import AgnoInstrumentation __all__ = [ "AnthropicInstrumentation", @@ -65,4 +63,5 @@ "GraphlitInstrumentation", "PhiDataInstrumentation", "AgnoInstrumentation", + "CleanLabInstrumentation", ] diff --git a/src/langtrace_python_sdk/instrumentation/cleanlab/__init__.py b/src/langtrace_python_sdk/instrumentation/cleanlab/__init__.py new file mode 100644 index 00000000..4fc1e445 --- /dev/null +++ b/src/langtrace_python_sdk/instrumentation/cleanlab/__init__.py @@ -0,0 +1,5 @@ +from .instrumentation import CleanLabInstrumentation + +__all__ = [ + "CleanLabInstrumentation", +] diff --git a/src/langtrace_python_sdk/instrumentation/cleanlab/instrumentation.py b/src/langtrace_python_sdk/instrumentation/cleanlab/instrumentation.py new file mode 100644 index 00000000..68ec4bbf --- /dev/null +++ b/src/langtrace_python_sdk/instrumentation/cleanlab/instrumentation.py @@ -0,0 +1,62 @@ +""" +Copyright (c) 2025 Scale3 Labs +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import importlib.metadata +import logging +from typing import Any, Collection, Optional + +from opentelemetry.instrumentation.instrumentor import BaseInstrumentor +from opentelemetry.trace import TracerProvider, get_tracer +from wrapt import wrap_function_wrapper + +from langtrace_python_sdk.instrumentation.cleanlab.patch import generic_patch + +logging.basicConfig(level=logging.FATAL) + + +class CleanLabInstrumentation(BaseInstrumentor): # type: ignore + + def instrumentation_dependencies(self) -> Collection[str]: + return ["cleanlab-tlm >= 1.0.7", "trace-attributes >= 4.0.5"] + + def _instrument(self, **kwargs: Any) -> None: + tracer_provider: Optional[TracerProvider] = kwargs.get("tracer_provider") + tracer = get_tracer(__name__, "", tracer_provider) + version: str = importlib.metadata.version("cleanlab_tlm") + + wrap_function_wrapper( + "cleanlab_tlm.tlm", + "TLM.prompt", + generic_patch(version, tracer), + ) + + wrap_function_wrapper( + "cleanlab_tlm.tlm", + "TLM.get_trustworthiness_score", + generic_patch(version, tracer), + ) + + wrap_function_wrapper( + "cleanlab_tlm.tlm", + "TLM.try_prompt", + generic_patch(version, tracer), + ) + + wrap_function_wrapper( + "cleanlab_tlm.tlm", + "TLM.try_get_trustworthiness_score", + generic_patch(version, tracer), + ) + + def _uninstrument(self, **kwargs: Any) -> None: + pass diff --git a/src/langtrace_python_sdk/instrumentation/cleanlab/patch.py b/src/langtrace_python_sdk/instrumentation/cleanlab/patch.py new file mode 100644 index 00000000..abb8cf5f --- /dev/null +++ b/src/langtrace_python_sdk/instrumentation/cleanlab/patch.py @@ -0,0 +1,75 @@ +import json +from typing import Any, Callable, List + +from importlib_metadata import version as v +from langtrace.trace_attributes import FrameworkSpanAttributes +from opentelemetry import baggage +from opentelemetry.trace import SpanKind, Tracer +from opentelemetry.trace.status import Status, StatusCode + +from langtrace_python_sdk.constants import LANGTRACE_SDK_NAME +from langtrace_python_sdk.constants.instrumentation.common import ( + LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY, SERVICE_PROVIDERS) +from langtrace_python_sdk.instrumentation.openai.types import \ + ChatCompletionsCreateKwargs +from langtrace_python_sdk.utils.llm import set_span_attributes +from langtrace_python_sdk.utils.misc import serialize_args, serialize_kwargs + + +def generic_patch(version: str, tracer: Tracer) -> Callable: + """Wrap the `prompt` method of the `TLM` class to trace it.""" + + def traced_method( + wrapped: Callable, + instance: Any, + args: List[Any], + kwargs: ChatCompletionsCreateKwargs, + ) -> Any: + service_provider = SERVICE_PROVIDERS["CLEANLAB"] + extra_attributes = baggage.get_baggage(LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY) + span_attributes = { + "langtrace.sdk.name": "langtrace-python-sdk", + "langtrace.service.name": service_provider, + "langtrace.service.type": "framework", + "langtrace.service.version": version, + "langtrace.version": v(LANGTRACE_SDK_NAME), + **(extra_attributes if extra_attributes is not None else {}), + } + + span_attributes["tlm.metadata"] = serialize_kwargs(**kwargs) + span_attributes["tlm.inputs"] = serialize_args(*args) + + attributes = FrameworkSpanAttributes(**span_attributes) + + with tracer.start_as_current_span( + name=f"tlm.{wrapped.__name__}", kind=SpanKind.CLIENT + ) as span: + try: + set_span_attributes(span, attributes) + result = wrapped(*args, **kwargs) + if result: + # Handle result serialization based on its type + if hasattr(result, 'model_dump_json'): + # For Pydantic models + result_json = json.loads(result.model_dump_json()) + elif isinstance(result, dict): + # For dictionary results + result_json = result + else: + # For other types, try to convert to dict or use string representation + try: + result_json = json.loads(json.dumps(result, default=str)) + except Exception: # pylint: disable=W0702, W0718 + result_json = str(result) + + span.set_attribute("tlm.result", str(result_json)) + span.set_status(Status(StatusCode.OK)) + + return result + + except Exception as err: + span.record_exception(err) + span.set_status(Status(StatusCode.ERROR, str(err))) + raise + + return traced_method diff --git a/src/langtrace_python_sdk/instrumentation/graphlit/patch.py b/src/langtrace_python_sdk/instrumentation/graphlit/patch.py index 0542920c..6033f6d1 100644 --- a/src/langtrace_python_sdk/instrumentation/graphlit/patch.py +++ b/src/langtrace_python_sdk/instrumentation/graphlit/patch.py @@ -1,4 +1,5 @@ import json + from importlib_metadata import version as v from langtrace.trace_attributes import FrameworkSpanAttributes from opentelemetry import baggage @@ -7,9 +8,7 @@ from langtrace_python_sdk.constants import LANGTRACE_SDK_NAME from langtrace_python_sdk.constants.instrumentation.common import ( - LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY, - SERVICE_PROVIDERS, -) + LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY, SERVICE_PROVIDERS) from langtrace_python_sdk.utils.llm import set_span_attributes from langtrace_python_sdk.utils.misc import serialize_args, serialize_kwargs @@ -27,8 +26,8 @@ async def traced_method(wrapped, instance, args, kwargs): **(extra_attributes if extra_attributes is not None else {}), } - span_attributes["langchain.metadata"] = serialize_kwargs(**kwargs) - span_attributes["langchain.inputs"] = serialize_args(*args) + span_attributes["graphlit.metadata"] = serialize_kwargs(**kwargs) + span_attributes["graphlit.inputs"] = serialize_args(*args) attributes = FrameworkSpanAttributes(**span_attributes) diff --git a/src/langtrace_python_sdk/langtrace.py b/src/langtrace_python_sdk/langtrace.py index 64914aba..edd30a4c 100644 --- a/src/langtrace_python_sdk/langtrace.py +++ b/src/langtrace_python_sdk/langtrace.py @@ -29,54 +29,9 @@ from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor from opentelemetry.sdk.resources import SERVICE_NAME, Resource from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import ( - BatchSpanProcessor, - ConsoleSpanExporter, - SimpleSpanProcessor, -) - -from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( - OTLPSpanExporter as GRPCExporter, -) -from opentelemetry.exporter.otlp.proto.http.trace_exporter import ( - OTLPSpanExporter as HTTPExporter, -) -from langtrace_python_sdk.constants.exporter.langtrace_exporter import ( - LANGTRACE_REMOTE_URL, - LANGTRACE_SESSION_ID_HEADER, -) -from langtrace_python_sdk.instrumentation import ( - AnthropicInstrumentation, - ChromaInstrumentation, - CohereInstrumentation, - CrewAIInstrumentation, - DspyInstrumentation, - EmbedchainInstrumentation, - GeminiInstrumentation, - GroqInstrumentation, - LangchainCommunityInstrumentation, - LangchainCoreInstrumentation, - LangchainInstrumentation, - LanggraphInstrumentation, - LiteLLMInstrumentation, - LlamaindexInstrumentation, - MistralInstrumentation, - AWSBedrockInstrumentation, - OllamaInstrumentor, - OpenAIInstrumentation, - PineconeInstrumentation, - QdrantInstrumentation, - AutogenInstrumentation, - VertexAIInstrumentation, - WeaviateInstrumentation, - PyMongoInstrumentation, - CerebrasInstrumentation, - MilvusInstrumentation, - GoogleGenaiInstrumentation, - GraphlitInstrumentation, - PhiDataInstrumentation, - AgnoInstrumentation, -) +from opentelemetry.sdk.trace.export import (BatchSpanProcessor, + ConsoleSpanExporter, + SimpleSpanProcessor) from opentelemetry.util.re import parse_env_headers from sentry_sdk.types import Event, Hint @@ -86,17 +41,18 @@ from langtrace_python_sdk.extensions.langtrace_exporter import \ LangTraceExporter from langtrace_python_sdk.instrumentation import ( - AnthropicInstrumentation, AutogenInstrumentation, + AgnoInstrumentation, AnthropicInstrumentation, AutogenInstrumentation, AWSBedrockInstrumentation, CerebrasInstrumentation, ChromaInstrumentation, - CohereInstrumentation, CrewAIInstrumentation, CrewaiToolsInstrumentation, - DspyInstrumentation, EmbedchainInstrumentation, GeminiInstrumentation, - GoogleGenaiInstrumentation, GroqInstrumentation, - LangchainCommunityInstrumentation, LangchainCoreInstrumentation, - LangchainInstrumentation, LanggraphInstrumentation, LiteLLMInstrumentation, + CleanLabInstrumentation, CohereInstrumentation, CrewAIInstrumentation, + CrewaiToolsInstrumentation, DspyInstrumentation, EmbedchainInstrumentation, + GeminiInstrumentation, GoogleGenaiInstrumentation, GraphlitInstrumentation, + GroqInstrumentation, LangchainCommunityInstrumentation, + LangchainCoreInstrumentation, LangchainInstrumentation, + LanggraphInstrumentation, LiteLLMInstrumentation, LlamaindexInstrumentation, MilvusInstrumentation, MistralInstrumentation, - OllamaInstrumentor, OpenAIInstrumentation, PineconeInstrumentation, - PyMongoInstrumentation, QdrantInstrumentation, VertexAIInstrumentation, - WeaviateInstrumentation) + OllamaInstrumentor, OpenAIInstrumentation, PhiDataInstrumentation, + PineconeInstrumentation, PyMongoInstrumentation, QdrantInstrumentation, + VertexAIInstrumentation, WeaviateInstrumentation) from langtrace_python_sdk.types import (DisableInstrumentations, InstrumentationMethods) from langtrace_python_sdk.utils import (check_if_sdk_is_outdated, @@ -334,6 +290,7 @@ def init( "cerebras-cloud-sdk": CerebrasInstrumentation(), "pymilvus": MilvusInstrumentation(), "crewai-tools": CrewaiToolsInstrumentation(), + "cleanlab-tlm": CleanLabInstrumentation(), } init_instrumentations(config.disable_instrumentations, all_instrumentations) diff --git a/src/langtrace_python_sdk/version.py b/src/langtrace_python_sdk/version.py index 32a78190..e4e78c0b 100644 --- a/src/langtrace_python_sdk/version.py +++ b/src/langtrace_python_sdk/version.py @@ -1 +1 @@ -__version__ = "3.8.0" +__version__ = "3.8.1"