In [None]:
# Basic imports for loading PDFs, splitting text, creating embeddings, and storing vectors

from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma


In [2]:
# Basic imports for environment variables, LLM, prompts, and local model manager

import os
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from foundry_local import FoundryLocalManager


In [3]:
# Set up embedding model name and configuration
model_name = "BAAI/bge-small-en-v1.5"

# Optional model settings (e.g., GPU)
# model_kwargs = {'device': 'cuda'}

# Encoding options for the embeddings
encode_kwargs = {'normalize_embeddings': True}

# Create the HuggingFace embeddings object
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    # model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)


In [None]:
# Load PDF documents using the loaders list
loaders = [

    PyPDFLoader(r"Visa The rise of Agentic Commerce.pdf")

]
documents = []
for loader in loaders:
    documents.extend(loader.load())

In [5]:
# Split documents into smaller text chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_documents(documents=documents)

# Create and store embeddings in Chroma
vector_store = Chroma.from_documents(
    texts,
    embeddings,
    collection_metadata={"hnsw:space": "cosine"},
    persist_directory="stores/data_cosine"
)

print("*" * 100)
print("Chroma Vector Store Created:", vector_store)
print("*" * 100)


****************************************************************************************************
Chroma Vector Store Created: <langchain_chroma.vectorstores.Chroma object at 0x00000119398FB8E0>
****************************************************************************************************


In [6]:
# Create a retriever to fetch top 2 similar chunks
retriever = vector_store.as_retriever(search_kwargs={"k": 2})


In [7]:
# Set model alias and create the local model manager
alias = "qwen2.5-0.5b"
manager = FoundryLocalManager(alias)


In [8]:
# Initialize the chat model using the local model manager settings
llm = ChatOpenAI(
    model=manager.get_model_info(alias).id,
    base_url=manager.endpoint,
    api_key=manager.api_key,
    temperature=0.3,
    streaming=False
)


In [9]:
# Print the LLM configuration
print(llm)


profile={} client=<openai.resources.chat.completions.completions.Completions object at 0x00000119398E5E70> async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x00000119398E5FF0> root_client=<openai.OpenAI object at 0x00000119398E5FC0> root_async_client=<openai.AsyncOpenAI object at 0x000001193A35BAF0> model_name='qwen2.5-0.5b-instruct-cuda-gpu:4' temperature=0.3 model_kwargs={} openai_api_key=SecretStr('**********') openai_api_base='http://127.0.0.1:61031/v1'


In [10]:
from langchain_core.prompts import ChatPromptTemplate

# Create the RAG function
def ask_question(question: str, retriever, llm):
    # Prompt template
    prompt = ChatPromptTemplate.from_messages([
        (
            "system",
            """Use the following pieces of document to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Only return the helpful answer below and nothing else."""
        ),
        ("human", "Document: {document}\nQuestion: {question}\n\nHelpful answer:")
    ])

    # Combine prompt with LLM
    rag_chain = prompt | llm

    # Retrieve documents
    docs = retriever.invoke(question)

    # Run chain
    response = rag_chain.invoke({"document": docs, "question": question})

    # Return only content
    return response.content



In [11]:
answer = ask_question("What defines an AI Agent?", retriever, llm)
print(answer)


An AI Agent is defined by its ability to perform tasks autonomously or semi-autonomously using artificial intelligence models to make decisions following reasoning frameworks and leveraging tools to go beyond their training data.
