In [10]:
import os
from typing import List, Dict
from dotenv import load_dotenv

from langfuse import Langfuse
import pandas as pd
import tiktoken

from config.base_config import rag_config
from prompts.rag import RAG_SYSTEM_PROMPT_FR

In [2]:
load_dotenv()

LANGFUSE_SECRET_KEY = os.environ.get("LANGFUSE_SECRET_KEY", None)
LANGFUSE_PUBLIC_KEY = os.environ.get("LANGFUSE_PUBLIC_KEY", None)
LANGFUSE_HOST = "http://localhost:3000"

In [3]:
langfuse = Langfuse(
  secret_key=LANGFUSE_SECRET_KEY,
  public_key=LANGFUSE_PUBLIC_KEY,
  host=LANGFUSE_HOST
)

In [4]:
tokenizer = tiktoken.encoding_for_model("gpt-4o-mini")

In [5]:
from enum import Enum

# get cached input tokens
# system prompt for rag/retrievers/etc. depending on user selection

# model providers
class ModelProvider(Enum):
    OPENAI = "openai"
    AZUREOPENAI = "azure_openai"
    ANTHROPIC = "anthropic"
    GROQ = "groq"
    GEMINI = "gemini"
    MISTRAL = "mistral"

# get output tokens
class ModelPricingService:

    _PRICING = {
        ModelProvider.OPENAI: {
            "IN": 0.15 / 1_000_0000,
            "IN_CACHED": 0.075 / 1_000_0000,
            "OUT": 0.6 / 1_000_0000,
        },
        ModelProvider.AZUREOPENAI: {
            "IN": 0.15 / 1_000_0000,
            "IN_CACHED": 0.075 / 1_000_0000,
            "OUT": 0.6 / 1_000_0000,
        },
        ModelProvider.ANTHROPIC: {
            "IN": 0.15 / 1_000_0000,
            "IN_CACHED": 0.075 / 1_000_0000,
            "OUT": 0.6 / 1_000_0000,
        },
        ModelProvider.GROQ: {
            "IN": 0.15 / 1_000_0000,
            "IN_CACHED": 0.075 / 1_000_0000,
            "OUT": 0.6 / 1_000_0000,
        },
        ModelProvider.GEMINI: {
            "IN": 0.15 / 1_000_0000,
            "IN_CACHED": 0.075 / 1_000_0000,
            "OUT": 0.6 / 1_000_0000,
        },
        ModelProvider.MISTRAL: {
            "IN": 0.15 / 1_000_0000,
            "IN_CACHED": 0.075 / 1_000_0000,
            "OUT": 0.6 / 1_000_0000,
        },
    }

    @classmethod
    def get_input_cost(cls, n_tokens: int, model_provider: ModelProvider, **kwargs) -> float:
        input_cost = cls._PRICING[model_provider].get("IN") * n_tokens
        return input_cost

    @classmethod
    def get_input_cached_cost(cls, n_tokens: int, model_provider: ModelProvider, **kwargs) -> float:
        input_cost = cls._PRICING[model_provider].get("IN_CACHED") * n_tokens
        return input_cost

    @classmethod
    def get_output_cost(cls, n_tokens: int, model_provider: ModelProvider, **kwargs) -> float:
        input_cost = cls._PRICING[model_provider].get("OUT") * n_tokens
        return input_cost

    @classmethod
    def get_total_cost(cls, n_input_tokens: int, n_input_cached_tokens: int, n_output_tokens: int, model_provider: ModelProvider, **kwargs) -> float:
        n_input_tokens_cost = cls.get_input_cost(n_input_tokens, model_provider)
        n_input_cached_tokens_cost = cls.get_input_cached_cost(n_input_cached_tokens, model_provider)
        n_output_tokens_cost = cls.get_output_cost(n_output_tokens, model_provider)

        cost = {
            "n_input_tokens_cost": n_input_tokens_cost,
            "n_input_cached_tokens_cost": n_input_cached_tokens_cost,
            "n_output_tokens_cost": n_output_tokens_cost,
            "total_cost": n_input_tokens_cost + n_input_cached_tokens_cost + n_output_tokens_cost
        }
        return cost

In [8]:
import itertools

def longest_common_prefix(str1, str2):
    # Use itertools to find the longest common prefix
    return ''.join(x[0] for x in itertools.takewhile(lambda x: x[0] == x[1], zip(str1, str2)))

# Get input

In [None]:
traces = langfuse.fetch_traces().data

In [None]:
messages = langfuse.fetch_observations(name="MessageBuilder_build_chat_prompt")
len(messages.data)

In [None]:
def get_input_tokens(messages):
    for message in messages.data[0].output:
        if message["role"] == "system":

            # find longest commong prefix
            prefix = longest_common_prefix(message["content"], RAG_SYSTEM_PROMPT_FR)

            # get input cached tokens
            n_input_cached_tokens = len(tokenizer.encode(prefix))

            # get input tokens
            input_tokens = message["content"].replace(prefix, "")
            n_input_tokens = len(tokenizer.encode(input_tokens))

        elif message["role"] == "user":
            # get query input tokens
            n_query_input_tokens = len(tokenizer.encode(message["content"]))


    n_input_tokens = {
        "n_input_cached_tokens": n_input_cached_tokens,
        "n_input_tokens": n_input_tokens + n_query_input_tokens,
    }

    return n_input_tokens

input_tokens = get_input_tokens(messages)
input_tokens

In [14]:
output_stream = langfuse.fetch_observations(name="openai_output_stream")

In [None]:
def get_output_tokens(output_stream):
    output_tokens = [tok for tok in output_stream.data[0].output if tok["content"]]
    n_output_tokens = len(output_tokens)

    return {"n_output_tokens": n_output_tokens}

output_tokens = get_output_tokens(output_stream)
output_tokens

In [None]:
ModelPricingService.get_total_cost(
    n_input_tokens=input_tokens["n_input_tokens"],
    n_input_cached_tokens=input_tokens["n_input_cached_tokens"],
    n_output_tokens=output_tokens["n_output_tokens"],
    model_provider=ModelProvider.OPENAI
)

# Get cached input

In [None]:
for message in input_tokens.data[0].output:
    if message["role"] == "system":
        break

In [None]:
# cached input
cached_input = longest_common_prefix(message["content"], RAG_SYSTEM_PROMPT_FR)
cached_input

In [None]:
n_input_cached_tokens = len(tokenizer.encode(cached_input))
n_input_cached_tokens

In [None]:
# non-cached input
non_cached_input = message["content"].replace(prefix, "")
non_cached_input

In [None]:
for message in input_tokens.data[0].output:
    if message["role"] == "user":
        break

In [None]:
n_query_input_tokens = len(tokenizer.encode(message["content"]))
n_query_input_tokens

In [None]:
n_input_tokens = len(tokenizer.encode(non_cached_input)) + n_query_input_tokens
n_input_tokens

# Get output

In [19]:
traces = langfuse.fetch_traces().data

In [None]:
output_stream = langfuse.fetch_observations(name="openai_output_stream")
output_tokens = [tok for tok in output_stream.data[0].output if tok["content"]]
n_output_tokens = len(output_tokens)
n_output_tokens

In [None]:
langfuse.fetch_observations(name="openai_output_stream")

# Cost

In [None]:
ModelPricingService.get_total_cost(
    n_input_tokens=n_input_tokens,
    n_input_cached_tokens=n_input_cached_tokens,
    n_output_tokens=n_output_tokens,
    model_provider=ModelProvider.OPENAI
)

# Retrievers

In [None]:
observations = langfuse.fetch_observations(name="retrieve")

# Source validation

# Topic check

In [None]:
#tokenizer = tiktoken.get_encoding("o200k_base")
tokenizer = tiktoken.encoding_for_model("gpt-4o-mini")

In [None]:
pricing = {
    "gpt-4o": {
        "input": 5,
        "output": 15
    },
    "gpt-4o-2024-08-06": {
        "input": 2.5,
        "output": 10
    },
    "gpt-4o-2024-05-13": {
        "input": 5,
        "output": 15
    },
    "gpt-4o-mini": {
        "input": 0.15,
        "output": 0.6
    },
    "gpt-4o-mini-2024-07-18": {
        "input": 0.15,
        "output": 0.6
    },
    "chatgpt-4o-latest": {
        "input": 5.00,
        "output": 15.00
    },
    "gpt-4-turbo": {
        "input": 10.00,
        "output": 30.00
    },
    "gpt-4-turbo-2024-04-09": {
        "input": 10.00,
        "output": 30.00
    },
    "gpt-4": {
        "input": 30.00,
        "output": 60.00
    },
    "gpt-4-32k": {
        "input": 60.00,
        "output": 120.00
    },
    "gpt-4-0125-preview": {
        "input": 10.00,
        "output": 30.00
    },
    "gpt-4-1106-preview": {
        "input": 10.00,
        "output": 30.00
    },
    "gpt-4-vision-preview": {
        "input": 10.00,
        "output": 30.00
    },
    "gpt-3.5-turbo-0125": {
        "input": 0.50,
        "output": 1.50
    },
    "gpt-3.5-turbo-instruct": {
        "input": 1.50,
        "output": 2.00
    },
    "gpt-3.5-turbo-1106": {
        "input": 1.00,
        "output": 2.00
    },
    "gpt-3.5-turbo-0613": {
        "input": 1.50,
        "output": 2.00
    },
    "gpt-3.5-turbo-16k-0613": {
        "input": 3.00,
        "output": 4.00
    },
    "gpt-3.5-turbo-0301": {
        "input": 1.50,
        "output": 2.00
    }
 }

In [None]:
model = rag_config["llm"]["model"]

if model in ["gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-2024-08-06", "chatgpt-4o-latest", "gpt-4o-mini", "gpt-4o-mini-2024-07-18"]:
    encoding = "o200k_base"
elif model in ["gpt-4-turbo", "gpt-4-turbo-2024-04-09", "gpt-4-turbo-preview", "gpt-4-0125-preview", "gpt-4-1106-preview", "gpt-4",
               "gpt-4-0613", "gpt-4-0314", "gpt-3.5-turbo-0125", "gpt-3.5-turbo", "gpt-3.5-turbo-1106", "gpt-3.5-turbo-instruct"]:
    encoding = "cl100k_base"

tokenizer = tiktoken.get_encoding(encoding)

def get_cost(tokenizer, input: List[str], output: List[str], pricing: Dict, model: str):

    n_input_toks = len(tokenizer.encode(input))
    n_output_toks = len(tokenizer.encode(output))
    input_cost = n_input_toks * pricing[model]["input"] / 1_000_000
    output_cost = n_input_toks * pricing[model]["output"] / 1_000_000

    return input_cost + output_cost

### Get traces

In [None]:
traces = langfuse.fetch_traces().data

In [None]:
traces[-1].output

In [None]:
trace_data = []

for trace in traces:

    _input = trace.input["kwargs"]["request"]["query"]
    output = "".join(trace.output) if trace.output else ""
    trace_data.append(
        {
            "id": trace.id,
            "timestamp": trace.timestamp.strftime('%Y-%m-%d %H:%M:%S'),
            "latency": trace.latency,
            "cost": get_cost(tokenizer=tokenizer,
                       _input=_input,
                       output=output,
                       pricing=pricing,
                       model=model),
            "input": _input,
            "output": output
        }
    )

trace_data_df = pd.DataFrame(trace_data)
trace_data_df

In [None]:
trace_data_df.cost.sum()

In [None]:
trace_data_df.describe()

### Observations

In [None]:
observations = langfuse.fetch_observations(name="retrieve")

In [None]:
obs = {obs.trace_id: obs.output for obs in observations.data}
trace_data_df["retrieval"] = trace_data_df["id"].map(obs)

In [None]:
trace_data_df