Skip to content

Commit

Permalink
Merge pull request #451 from SciPhi-AI/Nolan/Telemetry
Browse files Browse the repository at this point in the history
Add basic telemetry
  • Loading branch information
NolanTrem committed Jun 14, 2024
2 parents 536429c + 9d50d41 commit 9528e21
Show file tree
Hide file tree
Showing 9 changed files with 250 additions and 13 deletions.
3 changes: 2 additions & 1 deletion docs/pages/deep-dive/_meta.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
"config": "R2R Config",
"ingestion": "Ingestion Pipeline",
"search": "Search Pipeline",
"rag": "RAG Pipeline"
"rag": "RAG Pipeline",
"telemetry": "Telemetry"
}
27 changes: 27 additions & 0 deletions docs/pages/deep-dive/telemetry.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
## Telemetry

R2R utilizes telemetry that collects **anonymous** usage information.

Why? We use this information to help us understand how R2R is used so that we can prioritize work on new features and bug fixes, allowing us to improve R2R's performance and stability.

### Disabling Telemetry

To disable telemetry, set the environment variable `TELEMETRY_ENABLED` to `false`, `0`, or `f`. You can do this in your terminal before running your application:

```sh copy
export TELEMETRY_ENABLED=false
```

With this setup, telemetry events will not be captured if telemetry is disabled, providing a way for users to control the logging of events.

### What do you collect?

We collect basic information such as:

- **Feature Usage**: Which features are being used and how often.


### Where is telemetry information stored?
We use [Posthog](https://posthog.com/) for our telemetry data.

Posthog is an open source platform for product analytics. Learn more about Posthog on [posthog.com](https://posthog.com/) or [github.com/posthog](https://github.com/posthog).
47 changes: 46 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ ionic-api-sdk = {version = "0.9.3", optional = true}
exa-py = {version = "^1.0.9", optional = true}
dateutils = "^0.6.12"
fsspec = "^2024.6.0"
posthog = "^3.5.0"

[tool.poetry.extras]
all = ["tiktoken", "sentence-transformers"]
Expand Down
34 changes: 23 additions & 11 deletions r2r/main/r2r_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
to_async_generator,
)
from r2r.pipes import R2REvalPipe
from r2r.telemetry.telemetry_decorator import telemetry_event

from .r2r_abstractions import R2RPipelines, R2RProviders
from .r2r_config import R2RConfig
Expand Down Expand Up @@ -258,6 +259,7 @@ class UpdatePromptRequest(BaseModel):
template: Optional[str] = None
input_types: Optional[dict[str, str]] = None

@telemetry_event("UpdatePrompt")
async def update_prompt_app(self, request: UpdatePromptRequest):
"""Update a prompt's template and/or input types."""
try:
Expand Down Expand Up @@ -384,6 +386,7 @@ async def aingest_documents(
class IngestDocumentsRequest(BaseModel):
documents: list[Document]

@telemetry_event("IngestDocuments")
async def ingest_documents_app(self, request: IngestDocumentsRequest):
async with manage_run(
self.run_manager, "ingest_documents_app"
Expand Down Expand Up @@ -480,6 +483,7 @@ async def aupdate_documents(
class UpdateDocumentsRequest(BaseModel):
documents: list[Document]

@telemetry_event("UpdateDocuments")
async def update_documents_app(self, request: UpdateDocumentsRequest):
async with manage_run(
self.run_manager, "update_documents_app"
Expand Down Expand Up @@ -550,9 +554,7 @@ async def aingest_files(
> self.config.app.get("max_file_size_in_mb", 32)
* MB_CONVERSION_FACTOR
):
logger.error(
f"File size exceeds limit: {file.filename}"
)
logger.error(f"File size exceeds limit: {file.filename}")
raise HTTPException(
status_code=413,
detail="File size exceeds maximum allowed size.",
Expand All @@ -575,11 +577,10 @@ async def aingest_files(
status_code=415,
detail=f"'{file_extension}' is not a valid DocumentType.",
)
if (
DocumentType[file_extension.upper()]
in excluded_parsers
):
logger.error(f"{file_extension} is explicitly excluded in the configuration file.")
if DocumentType[file_extension.upper()] in excluded_parsers:
logger.error(
f"{file_extension} is explicitly excluded in the configuration file."
)
raise HTTPException(
status_code=415,
detail=f"{file_extension} is explicitly excluded in the configuration file.",
Expand Down Expand Up @@ -665,9 +666,7 @@ async def aingest_files(
)

if not skip_document_info:
self.providers.vector_db.upsert_documents_info(
document_infos
)
self.providers.vector_db.upsert_documents_info(document_infos)

return {
"processed_documents": [
Expand All @@ -686,6 +685,7 @@ async def aingest_files(
for file in files:
file.file.close()

@telemetry_event("IngestFiles")
async def ingest_files_app(
self,
files: list[UploadFile] = File(...),
Expand Down Expand Up @@ -854,6 +854,7 @@ class UpdateFilesRequest(BaseModel):
metadatas: Optional[str] = Form(None)
ids: str = Form("")

@telemetry_event("UpdateFiles")
async def update_files_app(
self,
files: list[UploadFile] = File(...),
Expand Down Expand Up @@ -943,6 +944,7 @@ class SearchRequest(BaseModel):
search_limit: int = 10
do_hybrid_search: Optional[bool] = False

@telemetry_event("Search")
async def search_app(self, request: SearchRequest):
async with manage_run(self.run_manager, "search_app") as run_id:
try:
Expand Down Expand Up @@ -1058,6 +1060,7 @@ class RAGRequest(BaseModel):
rag_generation_config: Optional[str] = None
streaming: Optional[bool] = None

@telemetry_event("RAG")
async def rag_app(self, request: RAGRequest):
async with manage_run(self.run_manager, "rag_app") as run_id:
try:
Expand Down Expand Up @@ -1167,6 +1170,7 @@ class EvalRequest(BaseModel):
context: str
completion: str

@telemetry_event("Evaluate")
async def evaluate_app(self, request: EvalRequest):
async with manage_run(self.run_manager, "evaluate_app") as run_id:
try:
Expand Down Expand Up @@ -1208,6 +1212,7 @@ class DeleteRequest(BaseModel):
keys: list[str]
values: list[Union[bool, int, str]]

@telemetry_event("Delete")
async def delete_app(self, request: DeleteRequest = Body(...)):
try:
return await self.adelete(request.keys, request.values)
Expand Down Expand Up @@ -1266,6 +1271,7 @@ async def alogs(

return {"results": aggregated_logs}

@telemetry_event("Logs")
async def logs_app(
self,
log_type_filter: Optional[str] = Query(None),
Expand Down Expand Up @@ -1363,6 +1369,7 @@ async def aanalytics(

return {"results": results}

@telemetry_event("Analytics")
async def analytics_app(
self,
filter_criteria: FilterCriteria = Body(...),
Expand Down Expand Up @@ -1390,6 +1397,7 @@ async def aapp_settings(self, *args: Any, **kwargs: Any):
}
}

@telemetry_event("AppSettings")
async def app_settings_app(self):
"""Return the config.json and all prompts."""
try:
Expand All @@ -1404,6 +1412,7 @@ async def ausers_stats(self, user_ids: Optional[list[uuid.UUID]] = None):
[str(ele) for ele in user_ids]
)

@telemetry_event("UsersStats")
async def users_stats_app(
self, user_ids: Optional[list[uuid.UUID]] = Query(None)
):
Expand Down Expand Up @@ -1433,6 +1442,7 @@ async def adocuments_info(
),
)

@telemetry_event("DocumentsInfo")
async def documents_info_app(
self,
document_ids: Optional[list[str]] = Query(None),
Expand All @@ -1453,6 +1463,7 @@ async def documents_info_app(
async def adocument_chunks(self, document_id: str) -> list[str]:
return self.providers.vector_db.get_document_chunks(document_id)

@telemetry_event("DocumentChunks")
async def document_chunks_app(self, document_id: str):
try:
chunks = await self.adocument_chunks(document_id)
Expand All @@ -1463,6 +1474,7 @@ async def document_chunks_app(self, document_id: str):
)
raise HTTPException(status_code=500, detail=str(e)) from e

@telemetry_event("OpenAPI")
def openapi_spec_app(self):
from fastapi.openapi.utils import get_openapi

Expand Down
Empty file added r2r/telemetry/__init__.py
Empty file.
59 changes: 59 additions & 0 deletions r2r/telemetry/events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from typing import Any, Dict
import uuid


class BaseTelemetryEvent:
def __init__(self, event_type: str, properties: Dict[str, Any]):
self.event_type = event_type
self.properties = properties
self.event_id = str(uuid.uuid4())


class DailyActiveUserEvent(BaseTelemetryEvent):
def __init__(self, user_id: str):
super().__init__("DailyActiveUser", {"user_id": user_id})


class FeatureUsageEvent(BaseTelemetryEvent):
def __init__(self, user_id: str, feature: str):
super().__init__(
"FeatureUsage", {"user_id": user_id, "feature": feature}
)


class ErrorEvent(BaseTelemetryEvent):
def __init__(self, user_id: str, endpoint: str, error_message: str):
super().__init__(
"Error",
{
"user_id": user_id,
"endpoint": endpoint,
"error_message": error_message,
},
)


class RequestLatencyEvent(BaseTelemetryEvent):
def __init__(self, endpoint: str, latency: float):
super().__init__(
"RequestLatency", {"endpoint": endpoint, "latency": latency}
)


class GeographicDistributionEvent(BaseTelemetryEvent):
def __init__(self, user_id: str, country: str):
super().__init__(
"GeographicDistribution", {"user_id": user_id, "country": country}
)


class SessionDurationEvent(BaseTelemetryEvent):
def __init__(self, user_id: str, duration: float):
super().__init__(
"SessionDuration", {"user_id": user_id, "duration": duration}
)


class UserPathEvent(BaseTelemetryEvent):
def __init__(self, user_id: str, path: str):
super().__init__("UserPath", {"user_id": user_id, "path": path})
42 changes: 42 additions & 0 deletions r2r/telemetry/posthog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import logging
import os
import posthog
from r2r.telemetry.events import BaseTelemetryEvent

logger = logging.getLogger(__name__)


class PosthogClient:
"""
This is a write-only project API key, so it can only create new events. It can't
read events or any of your other data stored with PostHog, so it's safe to use in public apps.
"""

def __init__(self, api_key: str, enabled: bool = True):
self.enabled = enabled
if self.enabled:
logger.info(
"Initializing anonymized telemetry. To disable, set TELEMETRY_ENABLED=false in your environment."
)
posthog.project_api_key = api_key
else:
posthog.disabled = True
logger.info(
f"Posthog telemetry {'enabled' if self.enabled else 'disabled'}"
)

def capture(self, event: BaseTelemetryEvent):
if self.enabled:
posthog.capture(event.event_id, event.event_type, event.properties)


# Initialize the telemetry client with a flag to enable or disable telemetry
telemetry_enabled = os.getenv("TELEMETRY_ENABLED", "true").lower() in (
"true",
"1",
"t",
)
telemetry_client = PosthogClient(
api_key="phc_OPBbibOIErCGc4NDLQsOrMuYFTKDmRwXX6qxnTr6zpU",
enabled=telemetry_enabled,
)
Loading

0 comments on commit 9528e21

Please sign in to comment.