In [1]:
from langchain_community.document_loaders import PyPDFLoader

In [2]:
loader = PyPDFLoader('ArtOfWar.pdf')

# Load the document
data = loader.load()

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
chunk_size = 350
chunk_overlap = 75
splitter = RecursiveCharacterTextSplitter(
    separators = ["\n\n", "Chapter","."],
    chunk_size = chunk_size,
    chunk_overlap = chunk_overlap
)
# Merge all text from documents into a single string
merged_text = " ".join(doc.page_content for doc in data)

# Now split the combined text
docs = splitter.split_text(merged_text)


In [4]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_core.documents import Document
import os
import shutil

# Define persist directory
persist_directory = "./chroma_db"

# Remove and reinitialize ChromaDB storage to avoid corruption
if os.path.exists(persist_directory):
    shutil.rmtree(persist_directory)
os.makedirs(persist_directory, exist_ok=True)

# Ensure docs is a list of Document objects
if isinstance(docs[0], str):
    docs = [Document(page_content=text) for text in docs]  # Convert strings to Document objects

# Check for valid documents
if not docs or not isinstance(docs[0], Document):
    raise ValueError("docs must be a non-empty list of Document objects.")

# Initialize Chroma vector store
vectorstore = Chroma.from_documents(
    docs,
    embedding=HuggingFaceBgeEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"),
    persist_directory=persist_directory  # Ensure a dedicated, writable directory
)

print("ChromaDB initialized successfully! ✅")


  embedding=HuggingFaceBgeEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"),


ChromaDB initialized successfully! ✅


In [6]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
query = "What does Sun Tzu say about deception?"	
top_query = retriever.get_relevant_documents(query)
for queries in top_query[:3]:
    print(queries.page_content)
    print("\n")

.
20. Hold out baits to entice the enemy. Feign disorder, and crush him.
[All commentators, except Chang Y u, say, "When he is in disorder, crush
him." It is more natural to suppose that Sun Tzu is still illustrating the uses
of deception in war.]
21. If he is secure at all points, be prepared for him. If he is in superior
strength, evade him.
22


. 72 Can we then recklessly arraign Sun Tzu for dis-
regarding truth and honesty?
66 See XIII. ss. 11, note.
67 This is a rather obscure allusion to the TSO CHUAN, where Tzu-ch‘an says: "If you have a piece of
beautiful brocade, you will not employ a mere learner to make it up."
68 Cf. TAO TE CHING, ch. 31


. The style of this fragment is not noticeable different from that of
Sun Tzu himself, but no commentator raises a doubt as to its genuineness.]
23. The Book of Army Management says:
[It is perhaps signiﬁcant that none of the earlier commentators give us any
information about this work




  top_query = retriever.get_relevant_documents(query)


In [22]:
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceEndpoint
from langchain.prompts import PromptTemplate
# Define the LLM
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
llm = HuggingFaceEndpoint(
    endpoint_url=MODEL_NAME,  # Replace with your HF API URL
    huggingfacehub_api_token="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",  # Load API Key from env
    temperature=0.7,  # Adjust response creativity
    max_new_tokens=512,  # Adjust response length

)




In [23]:
custom_prompt = PromptTemplate(
    template="""Use the retrieved context to provide a well-researched and thorough response.
If the answer is unknown, say you don't know—do not make up an answer.

Your response should:
- Explain Sun Tzu’s views on deception in detail.
- Provide at least one historical example of deception in war.
- Include a direct quote from *The Art of War* if available.

### Context:
{context}

### Question:
{question}

### Detailed Answer:
""",
    input_variables=["context", "question"],
)

# Create RetrievalQA Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    chain_type_kwargs={"prompt": custom_prompt}
)