In [36]:
# ✅ Install necessary libraries
!pip install langchain langchain_huggingface faiss-cpu sentence-transformers huggingface_hub transformers dotenv PyPDF2 -q

# ✅ Import required libraries
import os
import PyPDF2
from getpass import getpass
from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from google.colab import files

# ✅ Step 1: Set Hugging Face API Token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = getpass("Enter your Hugging Face API Token: ")

# ✅ Step 2: Set Hugging Face Model
HUGGINGFACE_REPO_ID = "mistralai/Mistral-7B-Instruct-v0.3"

# ✅ Step 3: Load LLM
def load_llm(huggingface_repo_id):
    llm = HuggingFaceEndpoint(
        repo_id=huggingface_repo_id,
        task="text-generation",
        temperature=0.5,
        model_kwargs={"max_length": 512},
        huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
    )
    return llm

# ✅ Step 4: Upload PDF File
uploaded = files.upload()
pdf_path = list(uploaded.keys())[0]

# ✅ Step 5: Extract Text from PDF using PyPDF2
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text() + "\n" if page.extract_text() else ""
    return text.strip()

pdf_text = extract_text_from_pdf(pdf_path)
print(f"✅ Extracted {len(pdf_text)} characters from PDF")

# ✅ Step 6: Split Text into Chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
documents = text_splitter.split_text(pdf_text)
docs = [Document(page_content=t) for t in documents]

# ✅ Step 7: Create FAISS Vector Store
DB_FAISS_PATH = "/content/vectorstore/db_faiss"
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(docs, embedding_model)
db.save_local(DB_FAISS_PATH)

print("✅ FAISS Vector Store Created & Saved!")

# ✅ Step 8: Load FAISS for Retrieval
db = FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)
print("✅ FAISS Vector Store Loaded!")

# ✅ Step 9: Add Memory for Chatbot (Remembers Previous Queries)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# ✅ Step 10: Create Conversational QA Chain
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=load_llm(HUGGINGFACE_REPO_ID),
    retriever=db.as_retriever(search_kwargs={'k': 3}),
    memory=memory,
)



Enter your Hugging Face API Token: ··········


Saving medical_book.pdf to medical_book (1).pdf
✅ Extracted 3137243 characters from PDF
✅ FAISS Vector Store Created & Saved!
✅ FAISS Vector Store Loaded!


  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


In [37]:

# ✅ Step 11: Chatbot Loop (Remembers Conversation History)
print("\n🤖 Chatbot is ready! Type 'exit' to stop.")
while True:
    user_query = input("Enter your query: ")
    if user_query.lower() == "exit":
        print("👋 Exiting chatbot. See you later!")
        break
    response = qa_chain.invoke({'question': user_query})
    print("\n🔍 **RESULT:**", response["answer"])


🤖 Chatbot is ready! Type 'exit' to stop.
Enter your query: what are meningioma tumours?





🔍 **RESULT:**  Meningioma tumours are not mentioned in the provided context. They are tumours that originate from the meninges, the membranes that cover the brain and spinal cord.
Enter your query: treatment options for brain tumours





🔍 **RESULT:**  Treatment for brain tumours usually involves surgical removal of the tumour and chemotherapy. Radiation may also be used, particularly for brain tumours. Complementary treatments can help decrease stress and reduce the side effects of cancer treatment, and may include activities such as yoga, massage, music therapy, meditation, prayer, or mild physical exercise.
Enter your query: what is radiation therapy





🔍 **RESULT:**  Radiation therapy, also known as radiotherapy, is a treatment method used for solid tumors, including brain tumors. It involves the administration of ionizing radiation to a specific tumor location. This generates reactive oxygen molecules, causing the destruction of DNA in local cells.
Enter your query: in more detail





🔍 **RESULT:**  Radiation therapy is a localized treatment used in the management of cancer. It involves the administration of ionizing radiation to a solid tumor location. This generates reactive oxygen molecules, causing the destruction of DNA in local cells. The decision to use radiation therapy depends on the stage of the disease. Radiation therapy may be used before surgery (preoperatively) and/or after surgery (postoperatively). Radiation given from a machine that is outside the body is called external radiation therapy. Sometimes applicators containing radioactive compounds are placed inside the vagina or uterus. This is called internal radiation therapy. Radiotherapy requires extensive treatment planning and imaging. Care must be taken to localize the cancer field while attempting to spare destruction of normal tissue. This requires image monitoring and exact positioning during radiation treatment sessions. Side effects and patient tolerance issues are common with chemotherapy,




🔍 **RESULT:**  Heterochromia is not explicitly mentioned in the provided context. It refers to the condition where an individual has two differently colored irises in their eyes.


KeyboardInterrupt: Interrupted by user

In [38]:
import pickle
import os

MODEL_DIR = "saved_model"
if not os.path.exists(MODEL_DIR):
    os.makedirs(MODEL_DIR)

# ✅ Save FAISS Vector Store
DB_FAISS_PATH = os.path.join(MODEL_DIR, "db_faiss")
db.save_local(DB_FAISS_PATH)
print("✅ FAISS Vector Store Saved!")

# ✅ Save Chatbot Memory (Optional)
MEMORY_PATH = os.path.join(MODEL_DIR, "chat_memory.pkl")
with open(MEMORY_PATH, "wb") as f:
    pickle.dump(memory, f)
print("✅ Chatbot Memory Saved!")

# ✅ Save LLM Configuration
LLM_CONFIG_PATH = os.path.join(MODEL_DIR, "llm_config.json")
llm_config = {"huggingface_repo_id": HUGGINGFACE_REPO_ID}
with open(LLM_CONFIG_PATH, "w") as f:
    json.dump(llm_config, f)
print("✅ LLM Configuration Saved!")


✅ FAISS Vector Store Saved!
✅ Chatbot Memory Saved!
✅ LLM Configuration Saved!


In [39]:
import os

MODEL_DIR = "saved_model"
files = os.listdir(MODEL_DIR)
print("📂 Saved Files:", files)


📂 Saved Files: ['chat_memory.pkl', 'db_faiss', 'llm_config.json']
