In [39]:
from langchain_huggingface import HuggingFaceEndpoint
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain_community.document_loaders import PyPDFLoader
import chromadb
import uuid
from langchain.vectorstores import Chroma
from chromadb.utils.batch_utils import create_batches
from dotenv import load_dotenv
load_dotenv()
import os

In [51]:
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"
key=os.environ.get('api_key')
model=HuggingFaceEndpoint(repo_id=repo_id,huggingfacehub_api_token=key,add_to_git_credential=True)

                    add_to_git_credential was transferred to model_kwargs.
                    Please make sure that add_to_git_credential is what you intended.


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to C:\Users\hp\.cache\huggingface\token
Login successful


In [32]:
embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [27]:
loader=PyPDFLoader(r"C:\Users\hp\Downloads\ilovepdf_merged_merged.pdf",extract_images=False)
data=loader.load()

In [28]:
textSplitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)
textChunks=textSplitter.split_documents(data)
len(textChunks)

2841

In [43]:
def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

from chromadb import Client

# Initialize the ChromaDB client
chromClient = Client()

max_batch_size = 166

batches = list(batch(textChunks, max_batch_size))

# Create a collection in the ChromaDB client
# collection = chromClient.create_collection(name="lungcancerHack")

# Add documents in batches
for batch_documents in batches:
    vectorStore = Chroma.from_documents(documents=batch_documents,
                                        collection_name="lungcancerHack",
                                        embedding=embeddings)



In [44]:
retriever=vectorStore.as_retriever()

In [45]:
query="What is Lungs Cancer?"
qa_chain=RetrievalQA.from_chain_type(
    llm=model,
    chain_type='stuff',
    retriever=retriever,
)
qa_chain.run(query)

  warn_deprecated(


'\nLung cancer is a disease caused by uncontrolled cell division in the lungs. This can result in the formation of a tumor, which can be seen on chest x-rays or CT scans as a nodule or a mass. Lung cancer usually starts in the airways or small air sacs of the lungs and can then spread to other organs. It is more common in people who have smoked tobacco products, but it can also occur in non-smokers.'

In [52]:
stage="final"
problem="cough and chest pain"
time="10 days"
query=f"Patient is at {stage} stage of lung cancer. Patient is experiencing {problem} for the past {time}."
query


'Patient is at final stage of lung cancer. Patient is experiencing cough and chest pain for the past 10 days.'

In [53]:
template = """
You are a lung cancer specialist. Your job is to provide a roadmap for the surgery and treatment of the patient. Junior doctors have already conducted imaging tests such as a chest CT scan and a PET scan, along with other lung cancer tests, and discussed the symptoms, medical history, and any other relevant information. The stage of lung cancer has already been determined and is provided below.

Your job is to generate a roadmap for how the doctor should proceed with the treatment of this particular patient, given the cancer stage and condition provided and also include potential drawbacks for each treatment. This should include recommendations for physical exercises the patient should do, things the patient needs to avoid, and a complete explanation of each step and potential drawbacks based on the context provided.

Context: {context}

Condition: {condition}

If you think the information provided is not relevant or the condition given to you by the junior doctor is not relevant, simply say, "Please provide correct information." If the patient is already in a normal condition, just simply say, "Don't worry, you are okay!" and don't need to generate a roadmap.
"""
prompt = ChatPromptTemplate.from_template(template)

In [54]:
from langchain.chains import HypotheticalDocumentEmbedder
hyde_embeddings=HypotheticalDocumentEmbedder.from_llm(model,
                                                      embeddings,
                                                      prompt_key="web_search")
def formatDocs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
   {"context": retriever|formatDocs , "condition": RunnablePassthrough()}
   | prompt
   | model
   | StrOutputParser()
)
rag_chain.invoke(query)

"\nRoadmap: \n\n1. Surgery: Given the final stage of lung cancer, surgery is not a viable option as it would not significantly improve the patient's condition. Moreover, the patient's current health status might not withstand the rigors of surgery.\n\n2. Radiation Therapy: This treatment can be used to shrink tumors and alleviate symptoms such as coughing and chest pain. However, it may cause side effects such as fatigue, skin reactions, and potential damage to nearby organs.\n\n3. Chemotherapy: This can be used to slow the growth of cancer cells and alleviate symptoms. However, it may cause side effects such as hair loss, nausea, and a weakened immune system.\n\n4. Immunotherapy: This is a newer treatment that uses the body's own immune system to fight cancer. It has fewer side effects than chemotherapy but may not be as effective for late-stage lung cancer.\n\n5. Palliative Care: This focuses on relieving symptoms and improving the patient's quality of life. It can be used in conjunc

-338058334