## 1. Install necessary libraries

In [None]:
# pip install langchain langchain-community pypdf sentence-transformers ollama

In [None]:
# ! pip install sentence_transformers

Collecting sentence_transformers
  Downloading sentence_transformers-5.0.0-py3-none-any.whl.metadata (16 kB)
Downloading sentence_transformers-5.0.0-py3-none-any.whl (470 kB)
Installing collected packages: sentence_transformers
Successfully installed sentence_transformers-5.0.0


In [1]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

## --- 1. Data Preparation ---
Replace with the path to your technical book

In [2]:

book_path = "C:/Users/amith/Documents/genai_proj1/unix programmers manual.pdf"
loader = PyPDFLoader(book_path)
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=300,
    length_function=len,
    is_separator_regex=False,
)
chunks = text_splitter.split_documents(documents)
print(f"Split book into {len(chunks)} chunks.")

Split book into 336 chunks.



## --- 2. Create Embeddings and Vector Store ---
Using a local embedding model

In [None]:
embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")

  embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


### Create a FAISS vector store from the chunks

In [4]:
print("Creating vector store (this might take a moment)...")
vectorstore = FAISS.from_documents(chunks, embeddings)
print("Vector store created.")


Creating vector store (this might take a moment)...
Vector store created.


* You can save and load the vectorstore to avoid re-embedding every time
* vectorstore.save_local("faiss_index")
* vectorstore = FAISS.load_local("faiss_index", embeddings)

## --- 3. Implement RAG ---
* Initialize Ollama LLM (make sure Ollama server is running and model is pulled)
* Example: ollama pull gemma:2b

In [5]:
llm = Ollama(model="gemma:2b") # Replace with your chosen Ollama model


  llm = Ollama(model="gemma:2b") # Replace with your chosen Ollama model


In [6]:
# Create a retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) # Retrieve top 3 relevant chunks

In [8]:
# Define your RAG prompt template
rag_prompt_template = """Use the following context to answer the question at the end.
If you don't know the answer, state that you don't know, and do not make up an answer.

Context:
{context}

Question: {question}

Answer:"""
RAG_PROMPT = PromptTemplate(
    template=rag_prompt_template,
    input_variables=["context", "question"]
)


In [9]:

# Create the RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff", # 'stuff' combines all retrieved documents into one prompt
    retriever=retriever,
    return_source_documents=True, # Optional: to see which chunks were used
    chain_type_kwargs={"prompt": RAG_PROMPT}
)


## --- 4. Chatbot Interaction (Basic CLI) ---

In [None]:
print("\nChatbot initialized! Ask questions about your technical book.")
print("Type 'exit' to quit.")

while True:
    query = input("\nYour question: ")
    if query.lower() == 'exit':
        break

    result = qa_chain.invoke({"query": query})
    print("\nChatbot Answer:")
    print(result["result"])
    # Optional: print source documents
    # print("\nSource Documents:")
    # for doc in result["source_documents"]:
    #     print(f"- {doc.metadata.get('source', 'Unknown source')}: {doc.page_content[:150]}...")


Chatbot initialized! Ask questions about your technical book.
Type 'exit' to quit.

Chatbot Answer:
The context does not provide any information about insurance, double down, or the random number generator, so I cannot answer this question from the context.

Chatbot Answer:
The context does not provide a list of important commands, so I cannot answer this question from the provided context.

Chatbot Answer:
The context does not specify what mkdir is, so I cannot answer this question from the context.

Chatbot Answer:
The context does not provide information about copy commands, so I cannot answer this question from the provided context.

Chatbot Answer:
Sure, here is the answer to the question:

The context does not define what a directory is, so I cannot answer this question from the context.

Chatbot Answer:
Sure, here's the answer to your question:

The context describes a file system format and its structure. It mentions that a file system consists of a super block, a free list of

Chatbot initialized! Ask questions about your technical book.
Type 'exit' to quit.

Chatbot Answer:
The context does not provide any information about what unix is, so I cannot answer this question from the context.

Chatbot Answer:
The context does not provide any information about important commands to copy files, so I cannot answer this question from the context.

Chatbot Answer:
Sure, here's the answer to the question:

The context does not specify what the ls command is, so I cannot answer this question from the provided context.

Chatbot Answer:
The context does not specify what cp is, so I cannot answer this question from the context.

Chatbot Answer:
A directory is a special type of file that contains other files and or directories. A directory is indicated by a bit in the file's i-node entry, and it can have up to 16 entries. The first two words of each entry contain the i-number and filename, respectively, followed by a null-padded string of up to 14 characters. The first two words of each entry are for the directory itself, and the second is for the parent directory.

Chatbot Answer:
The context does not mention anything about "hi", so I cannot answer this question from the context.

Chatbot Answer:
The context does not provide any information about more, so I cannot answer the question.

Chatbot Answer:
The context does not specify what mkdir is, so I cannot answer this question from the context.