### Loading Documents

In [1]:
from langchain_community.document_loaders import Docx2txtLoader
import os

DATA_PATH = "D:/My Projects/GenAI Document Intelligence (RAG)"
documents = []

for file in os.listdir(DATA_PATH):
    if file.endswith(".docx"):
        loader = Docx2txtLoader(os.path.join(DATA_PATH, file))
        docs = loader.load()
        for d in docs:
            d.metadata["source"] = file
        documents.extend(docs)

print(f"Total pages loaded: {len(documents)}")


Total pages loaded: 3


### Text Chunking (Critical for Financial Docs)

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=150
)

chunks = text_splitter.split_documents(documents)
print(f"Total chunks created: {len(chunks)}")


Total chunks created: 1247


### Load Hugging Face Embeddings (Offline)

In [6]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2"
)

print("HuggingFace embeddings loaded")


HuggingFace embeddings loaded


### Create FAISS Vector Store

In [7]:
from langchain_community.vectorstores import FAISS

vector_db = FAISS.from_documents(chunks, embeddings)
vector_db.save_local("faiss_index")

print("FAISS vector store created successfully")


FAISS vector store created successfully


### Load Vector DB & Create Retriever

In [8]:
vector_db = FAISS.load_local(
    "faiss_index",
    embeddings,
    allow_dangerous_deserialization=True
)


In [9]:
retriever = vector_db.as_retriever(
    search_kwargs={"k": 4}
)

print("Retriever ready")


Retriever ready


### Add a Local / Free LLM

In [10]:
from langchain_community.llms import HuggingFacePipeline
from transformers import pipeline

pipe = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    max_length=512
)

llm = HuggingFacePipeline(pipeline=pipe)


Device set to use cpu
  llm = HuggingFacePipeline(pipeline=pipe)


### Build Retrieval-Augmented Generation (RAG) Chain

In [14]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser


In [15]:
prompt = ChatPromptTemplate.from_template(
    """
    Answer the question using ONLY the context below.
    If the answer is not in the context, say "I don't know".

    Context:
    {context}

    Question:
    {question}
    """
)


In [16]:
rag_chain = (
    {
        "context": retriever,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)

print("Runnable-based RAG pipeline ready")


Runnable-based RAG pipeline ready


### Ask Financial Questions (Inference)

In [45]:
questions = [
    "how does the company profited in 2022?"
]

for q in questions:
    answer = rag_chain.invoke(q)
    print("\nQ:", q)
    print("A:", answer)


Token indices sequence length is longer than the specified maximum sequence length for this model (3473 > 512). Running this sequence through the model will result in indexing errors



Q: how does the company profited in 2022?
A: We reported $198 billion in revenue and $83 billion in operating income. And the Microsoft Cloud surpassed $100 billion in annualized revenue for the first time.


In [22]:
questions = [
    "What risks are mentioned in the FY22 10-K report?",
    "What message did leadership emphasize in the shareholder letter?",
    "How did cloud services perform in FY2022?",
    "What were the major operating expenses?"
]

for q in questions:
    answer = rag_chain.invoke(q)
    print("\nQ:", q)
    print("A:", answer)



Q: What risks are mentioned in the FY22 10-K report?
A: Quantitative and Qualitative Disclosures about Market Risk

Q: What message did leadership emphasize in the shareholder letter?
A: The importance of the effective engagement and action on environmental, social, and governance topics. To meet the expectations of our stakeholders and to and maintain their trust, we are committed to conducting our business in ways that are principled, transparent, and accountable and we have made a broad range of environmental and social commitments.

Q: How did cloud services perform in FY2022?
A: Whether we succeed in cloud-based services depends on our execution in several areas, including: nn Continuing to bring to market compelling cloud-based experiences that generate increasing traffic and market share. nnMaintaining the utility, compatibility, and performance of our cloud-based services on the growing array of computing devices, including PCs, smartphones, tablets, gaming consoles, and other

### GRADIO APP

In [47]:
import os
import gradio as gr

from langchain_community.document_loaders import Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_community.llms import HuggingFacePipeline
from transformers import pipeline


# =====================================================
# CONFIG
# =====================================================
DATA_PATH = "D:/My Projects/GenAI Document Intelligence (RAG)"
FAISS_PATH = "faiss_index"


# =====================================================
# Load Documents (same as notebook)
# =====================================================
documents = []

for file in os.listdir(DATA_PATH):
    if file.endswith(".docx"):
        loader = Docx2txtLoader(os.path.join(DATA_PATH, file))
        docs = loader.load()
        for d in docs:
            d.metadata["source"] = file
        documents.extend(docs)

if not documents:
    raise RuntimeError("No DOCX files found in DATA_PATH")

# =====================================================
# Chunking
# =====================================================
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=150
)
chunks = text_splitter.split_documents(documents)


# =====================================================
# Embeddings
# =====================================================
embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2"
)


# =====================================================
# Vector Store
# =====================================================
if os.path.exists(FAISS_PATH):
    vector_db = FAISS.load_local(
        FAISS_PATH,
        embeddings,
        allow_dangerous_deserialization=True
    )
else:
    vector_db = FAISS.from_documents(chunks, embeddings)
    vector_db.save_local(FAISS_PATH)

retriever = vector_db.as_retriever(search_kwargs={"k": 4})


# =====================================================
# LLM (Offline)
# =====================================================
pipe = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    max_length=512
)
llm = HuggingFacePipeline(pipeline=pipe)


# =====================================================
# Prompt
# =====================================================
prompt = ChatPromptTemplate.from_template(
    """
    Answer the question using ONLY the context below.
    If the answer is not in the context, say "I don't know".

    Context:
    {context}

    Question:
    {question}
    """
)


# =====================================================
# RAG Chain (Runnable)
# =====================================================
rag_chain = (
    {
        "context": retriever,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)


# =====================================================
# Inference Function (SAFE)
# =====================================================
def ask_question(question: str):
    question = str(question).strip()

    if not question:
        return "Please enter a question.", ""

    try:
        answer = rag_chain.invoke(question)

        docs = retriever.invoke(question)
        sources = sorted({d.metadata.get("source", "Unknown") for d in docs})

        sources_text = "\n".join(f"- {s}" for s in sources)

        return answer, sources_text

    except Exception as e:
        return f"Error during inference: {str(e)}", ""


# =====================================================
# Gradio UI (STABLE)
# =====================================================
with gr.Blocks(title="GenAI Document Intelligence (RAG)") as demo:

    gr.Markdown(
        """
        # ðŸ“„ GenAI Document Intelligence (RAG)
        Ask financial and business questions from Microsoft FY2022 documents  
        using an **offline Retrieval-Augmented Generation system**.
        """
    )

    question_input = gr.Textbox(
        label="Your Question",
        placeholder="e.g. How did cloud services perform in FY2022?",
        lines=2
    )

    ask_btn = gr.Button("Ask")

    answer_output = gr.Textbox(
        label="Answer",
        lines=6
    )

    sources_output = gr.Textbox(
        label="Sources",
        lines=4
    )

    ask_btn.click(
        fn=ask_question,
        inputs=question_input,
        outputs=[answer_output, sources_output]
    )

demo.launch()


Device set to use cpu


* Running on local URL:  http://127.0.0.1:7873
* To create a public link, set `share=True` in `launch()`.




Token indices sequence length is longer than the specified maximum sequence length for this model (740 > 512). Running this sequence through the model will result in indexing errors
