# Library

In [21]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI
import os

from dotenv import load_dotenv
load_dotenv()


True

# Get Embedding Model

In [22]:
api_key = os.getenv("api_key")

In [23]:
from embedded_utils import get_embedding_model
embedding_model = get_embedding_model()

model = "qwen/qwen3-8b:free"
llm = ChatOpenAI(
    model=model,
    openai_api_key=api_key,
    base_url="https://openrouter.ai/api/v1",
    # default_headers={
    #   "HTTP-Referer": "http://localhost:8501", # Replace with your actual site URL
    #   "X-Title": "My RAG App", # Replace with your actual site name
    # }
)
print(f"LLM configured to use OpenRouter with model: {model}")

INFO: Initializing HuggingFaceEmbeddings model: all-MiniLM-L6-v2
LLM configured to use OpenRouter with model: qwen/qwen3-8b:free


In [24]:
q_fin_dict = {
    'Risk Management': 'q-fin.RM',
    'Computational Finance': 'q-fin.CP',
    'Statistical Finance': 'q-fin.ST',
    'Trading and Market Microstructure': 'q-fin.TR',
    'Economics': 'q-fin.EC',
    'General Finance': 'q-fin.GN',
    'Mathematical Finance': 'q-fin.MF',
    'Portfolio Management': 'q-fin.PM',
    'Pricing of Securities': 'q-fin.PR'
}

# Get Vector

In [25]:
root_db_path = "../vectorDB"

In [26]:
persist_directory = 'base/PrinciplesofFinance-WEB.pdf'
persist_directory = root_db_path + "/" + persist_directory

print(persist_directory)
print(f"Loading vector store from: {persist_directory}")

try:
    vectorstore = Chroma(
        persist_directory=persist_directory,
        embedding_function=embedding_model
    )
    print("Vector store loaded successfully.")
    # Optional: Check the number of documents in the store
    print(f"Number of documents in store: {vectorstore._collection.count()}")
    
except Exception as e:
    print(f"Error loading vector store: {e}")
    # Handle error, maybe the directory doesn't exist or is corrupted
    exit()

../vectorDB/base/PrinciplesofFinance-WEB.pdf
Loading vector store from: ../vectorDB/base/PrinciplesofFinance-WEB.pdf
Vector store loaded successfully.
Number of documents in store: 2137


# Retriver

In [27]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={'k': 3}
)

print("Retriever created successfully.")

Retriever created successfully.


# Prompt Template

In [28]:
from langchain_core.prompts import ChatPromptTemplate

template = """
You are an expert assistant for answering questions.
Use only the following context to answer the question at the end.
If you don't know the answer from the context, just say that you don't know.
Do not make up an answer.

CONTEXT:
{context}

QUESTION:
{question}

ANSWER:
"""

prompt = ChatPromptTemplate.from_template(template)

# RAG Chain

In [29]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# --- 5. Build the RAG Chain using LCEL ---

# This function formats the retrieved documents into a single string
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# The core RAG chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print("RAG chain created successfully. Ready to answer questions.")

RAG chain created successfully. Ready to answer questions.


# Chain runner

In [30]:

# --- 6. Run the Chain ---
if __name__ == "__main__":
    while True:
        question = input("Ask a question (or type 'exit' to quit): ")
        if question.lower() == 'exit':
            break
        
        # Invoke the chain to get the answer
        # answer = rag_chain.invoke(question)
        answer = retriever.invoke(question)
        
        print("\n--- Answer ---\n")
        for i, doc in enumerate(answer):
            print(f"\n--- Document {i+1} ---")
            print(doc.page_content) 
        print(answer)
        print("\n--------------\n")