## Raga Tracing Sample Notebook

In [1]:
# !pip install langchain-openai langchain-chroma langchain-community pypdf ragaai-catalyst

In [1]:
import os
from dotenv import load_dotenv
from ragaai_catalyst import RagaAICatalyst, Experiment, Dataset, Tracer, Evaluation

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

RAGA_ACCESS_KEY = os.getenv("RAGA_ACCESS_KEY")
RAGA_SECRET_KEY = os.getenv("RAGA_SECRET_KEY")

catalyst = RagaAICatalyst(
    access_key=RAGA_ACCESS_KEY,
    secret_key=RAGA_SECRET_KEY,
    base_url="https://catalyst.raga.ai/api"
)

# catalyst.project_use_cases()
catalyst.list_projects()

  from .autonotebook import tqdm as notebook_tqdm


Token(s) set successfully


['BotTest', 'BrokerBot', 'ForgeFAQ']

In [2]:
# # resources:
# - https://docs.raga.ai/ragaai-catalyst-1/concepts/uploading-data/logging-traces
# - https://colab.research.google.com/drive/1-Os-m_DTSnvpUhvqNoGnuyOOII7ni9YT?usp=sharing
# - https://colab.research.google.com/drive/1XzhHQAAoNxrOUmBXyCj_N3Oe-Q_6HJd3?usp=sharing#scrollTo=3DGbYUlNE40A

tracer = Tracer(
    dataset_name="test_dataset", # required to name dataset upon creation
    project_name="BotTest",
    tracer_type="langchain", #llama_index not available until beta release on 11/11
    pipeline={
        "llm_model": "gpt-3.5-turbo", # metadata
        "vector_store": "faiss",
        "embed_model": "text-embedding-ada-002",
    },
    #add your metadata as "key":"value" pairs
    metadata={"use-case": "YourUseCase", "stage": "testing-stage"}
)

tracer.start()

Tracer started for project: BotTest


<ragaai_catalyst.tracers.tracer.Tracer at 0x154629490>

In [3]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
# from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from opentelemetry.trace import SpanKind
source_doc_path = "inputs/data.pdf"

# Initialize necessary variables
retriever = None
loaded_doc = None
def load_document(source_doc_path):

    try:
        loader = PyPDFLoader(source_doc_path)
        pages = loader.load_and_split()
        embeddings = OpenAIEmbeddings()
        vectorstore = Chroma.from_documents(pages, embeddings)
        retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
        print("Document loaded and processed.")
        return retriever
    except Exception as e:
        print(f"An error occurred while loading the document: {e}")
        return None

def generate_response(retriever, query):

    try:
        # llm = ChatGoogleGenerativeAI(model="gemini-pro")
        llm = ChatOpenAI(model="gpt-4o-mini")
        template = """
            You are a helpful AI assistant. Answer based on the context provided.
            context: {context}
            input: {input}
            answer:
            """
        prompt = PromptTemplate.from_template(template)
        combine_docs_chain = create_stuff_documents_chain(llm, prompt)
        retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)
        response = retrieval_chain.invoke({"input": query})
        print(response["answer"])
        return response["answer"]
    except Exception as e:
        print(f"An error occurred while generating the response: {e}")
        return None

def process_document(source_doc_path, loaded_doc, query):
    try:
        if loaded_doc != source_doc_path:
            retriever = load_document(source_doc_path)
            if retriever is None:
                return "Failed to load document."
            loaded_doc = source_doc_path
        else:
            print("Using cached document retriever.")
        response = generate_response(retriever, query)
        return response
    except Exception as e:
        print(f"An overall error occurred: {e}")
        return "An error occurred during the document processing."

In [4]:
query = "what is the title of the paper."

# Process the document and get the response
response = process_document(source_doc_path, loaded_doc, query)

INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Document loaded and processed.


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


The title of the paper is not explicitly provided in the given context. However, it is published in the "Journal of the Brazilian Society of Mechanical Sciences and Engineering" in 2023. If you need the specific title, please refer to the journal or article directly.


In [5]:
tracer.stop()

Stopping tracer and initiating trace upload...
Tracer provider shut down successfully


Uploading traces: 0it [00:00, ?it/s]

Uploading traces...


Uploading traces: 1it [00:00,  2.81it/s]


'Trace upload initiated. Use get_upload_status() to check the status.'