In [1]:
import os
os.environ["GROQ_API_KEY"] = "PASTE_KEY_HERE"


#### Here sets up the Groq API key.

In [2]:
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

print(" Imports working perfectly")


 Imports working perfectly


#### Here imports all required libraries for data processing, vector search, and chatbot construction.

In [4]:
import pandas as pd

holdings_df = pd.read_csv("holdings.csv")
trades_df = pd.read_csv("trades.csv")

holdings_df.head(), trades_df.head()


(   AsOfDate  OpenDate CloseDate ShortName PortfolioName StrategyRefShortName  \
 0  01/08/23  04/03/20       NaN  Garfield      Garfield              Default   
 1  01/08/23  04/03/20       NaN  Garfield      Garfield              Default   
 2  01/08/23  04/03/20       NaN  Garfield      Garfield              Default   
 3  01/08/23  04/03/20       NaN  Garfield      Garfield              Default   
 4  01/08/23  04/03/20       NaN   Heather       Heather              Default   
 
   Strategy1RefShortName Strategy2RefShortName CustodianName DirectionName  \
 0                 Asset             DefaultS2    Well Prime          Long   
 1                 Asset             DefaultS2    Well Prime          Long   
 2                 Asset             DefaultS2    Well Prime          Long   
 3                 Asset             DefaultS2    Well Prime          Long   
 4                 Asset             DefaultS2    Well Prime          Long   
 
    ...  StartPrice Price StartFXRate  FXR

#### Here loads the provided holdings and trades CSV files into Pandas DataFrames, which act as the chatbot’s only data sources.

In [4]:
def df_to_text(df, name):
    text = f"{name} DATA:\n"
    for _, row in df.iterrows():
        row_text = ", ".join([f"{col}: {row[col]}" for col in df.columns])
        text += row_text + "\n"
    return text

holdings_text = df_to_text(holdings_df, "HOLDINGS")
trades_text = df_to_text(trades_df, "TRADES")

combined_text = holdings_text + "\n" + trades_text


 converts structured CSV data into readable text format so it can be processed by the language model. and merges holdings and trades text into a single knowledge base for unified retrieval

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)

documents = splitter.create_documents([combined_text])


This cell splits large text into smaller overlapping chunks to improve retrieval accuracy and avoid token limits

In [6]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

vectorstore = FAISS.from_documents(documents, embeddings)


converts text chunks into semantic embeddings using a HuggingFace sentence-transformer model.and stores embeddings in a FAISS vector database to enable fast semantic search

In [7]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="llama-3.1-8b-instant",   
    temperature=0
)


Here initializes the Groq-hosted LLaMA model with deterministic output for reliable responses.

In [8]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a financial data assistant.
Answer the question ONLY using the provided context.
Give a clear and concise final answer.
Do NOT explain your reasoning.
If the answer is not present, reply exactly:
"Sorry can not find the answer"

Context:
{context}

Question:
{question}

Final Answer:
"""
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    chain_type="stuff",
    chain_type_kwargs={"prompt": prompt}
)


Here I defines a strict prompt that forces the model to answer only from provided context and return a fixed fallback if the answer is missing

In [9]:
def can_answer_from_data(question):
    q = question.lower()
    keywords = [
        "how many", "count", "total", "list",
        "which", "what", "portfolio", "holding",
        "trade", "custodian", "strategy", "counterparty"
    ]
    return any(k in q for k in keywords)


Here connects the language model, retriever, and prompt into a Retrieval-Augmented chatbot pipeline.

In [10]:
def ask_bot(question):
    q = question.lower()

    try:
        # holdings count
        if "holding" in q and "portfolio" in q:
            counts = holdings_df.groupby("PortfolioName").size()
            if not counts.empty:
                return "\n".join([f"{k}: {v}" for k, v in counts.items()])

        # trades count
        if "trade" in q and "portfolio" in q:
            counts = trades_df.groupby("PortfolioName").size()
            if not counts.empty:
                return "\n".join([f"{k}: {v}" for k, v in counts.items()])

    except Exception:
        pass  # if logic fails, move to RAG

    # TRY RAG SECOND 
    if can_answer_from_data(question):
        result = qa.invoke({"query": question})
        answer = result["result"]
        if answer and "sorry" not in answer.lower():
            return answer.strip()

    # FINAL FALLBACK 
    return "Sorry can not find the answer"


Here identifies whether a question is related to the dataset before sending it to the chatbot.

In [11]:
questions = [
    "How many holdings does each PortfolioName have?",
    "How many holdings are there in UNC Investment Fund?",
    "Which CustodianName is associated with each PortfolioName?",
    "What Counterparty is used by each PortfolioName?",
    "What StrategyName is used in each PortfolioName?",
    "What is the total number of trades?",
    "How many trades are there for each PortfolioName?",
    "Which PortfolioName performed better based on Profit and Loss?",
    "What is the total Profit and Loss for each PortfolioName?",
    "Who is the CEO of Apple?"
]

for q in questions:
    print(f"Q: {q}")
    print(f"A: {ask_bot(q)}")
    print("-" * 50)


Q: How many holdings does each PortfolioName have?
A: CoYold 1: 7
CoYold 11: 3
CoYold 7: 1
Garfield: 221
Heather: 195
Hi Yield: 19
IG Corp: 1
MNC Investment Fund: 243
NPSMF1: 17
NPSMF2: 17
NPSMF3: 17
Northpoint 401K: 14
Opium Holdings Partners: 131
Platpot: 61
SMA-L1: 3
SMA-L2: 3
SMA-L4: 3
Warren Lee IG: 15
Ytum: 51
--------------------------------------------------
Q: How many holdings are there in UNC Investment Fund?
A: Sorry can not find the answer
--------------------------------------------------
Q: Which CustodianName is associated with each PortfolioName?
A: Account A - Custodian 1
Account A - Custodian 1
Account A - Custodian 1
Account A - Custodian 1
--------------------------------------------------
Q: What Counterparty is used by each PortfolioName?
A: PortfolioName: ClientA, Counterparty: Internal
PortfolioName: Account A, Counterparty: ABGS
PortfolioName: Account A, Counterparty: ABGS
PortfolioName: Account A, Counterparty: ABGS
-------------------------------------------

Here I handles user questions by using Pandas for accurate calculations and RAG for descriptive answers, with a safe fallback.And This cell validates the chatbot using sample questions, including both valid data queries and out-of-scope queries.

In [12]:
import ipywidgets as widgets
from IPython.display import display, Markdown, clear_output

# UI Elements
question_box = widgets.Text(
    placeholder="Ask a question about holdings or trades...",
    description="Question:",
    layout=widgets.Layout(width="85%")
)

ask_button = widgets.Button(
    description="Ask",
    button_style="success"
)

output = widgets.Output()


def on_ask_clicked(b):
    with output:
        clear_output()

        question = question_box.value.strip()
        if not question:
            display(Markdown("**Please enter a question**"))
            return

        display(Markdown("**Thinking...**"))

        try:
            answer = ask_bot(question)
        except Exception as e:
            clear_output()
            display(Markdown(f" **Error:** {e}"))
            return

        clear_output()
        display(Markdown(f"### Answer\n{answer}"))


ask_button.on_click(on_ask_clicked)

display(question_box, ask_button, output)


Text(value='', description='Question:', layout=Layout(width='85%'), placeholder='Ask a question about holdings…

Button(button_style='success', description='Ask', style=ButtonStyle())

Output()

Here I added an interactive UI using ipywidgets to allow users to ask questions easily inside the notebook