In [20]:
import requests
from bs4 import BeautifulSoup
import os
import chromadb
from langchain.retrievers.merger_retriever import MergerRetriever
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.document_transformers import (
    EmbeddingsRedundantFilter,
    EmbeddingsClusteringFilter,
)
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.retrievers import ContextualCompressionRetriever
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [21]:
import requests
from bs4 import BeautifulSoup

# List of URLs to extract content from
urls = [
    "https://www.nhsinform.scot/illnesses-and-conditions/nutritional/dehydration/",
    "https://www.nhsinform.scot/illnesses-and-conditions/stomach-liver-and-gastrointestinal-tract/hepatitis-a/",
    "https://www.nhsinform.scot/illnesses-and-conditions/infections-and-poisoning/scarlet-fever/",
    "https://www.nhsinform.scot/illnesses-and-conditions/stomach-liver-and-gastrointestinal-tract/stomach-ulcer/"
]

# Function to extract text content from a single URL
def extract_text(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
    }
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an error for HTTP errors

        # Parse HTML content
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract all text and concatenate it into a single string
        page_text = soup.get_text(separator=' ', strip=True)
        return page_text

    except requests.exceptions.RequestException as e:
        print(f"Could not retrieve {url}: {e}")
        return ""

# Concatenate text content from each URL
concatenated_text = ""
for url in urls:
    concatenated_text += extract_text(url) + "\n\n"

# Save concatenated text to a file
with open("concatenated_text.txt", "w", encoding="utf-8") as file:
    file.write(concatenated_text)

print("Text content has been successfully saved to 'concatenated_text.txt'.")

Text content has been successfully saved to 'concatenated_text.txt'.


In [22]:
model_name = "BAAI/bge-large-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)
print("Embedding Model Loaded..........")

Embedding Model Loaded..........


In [23]:
from langchain_community.document_loaders import TextLoader

class CustomTextLoader(TextLoader):
    def __init__(self, file_path: str, encoding: str = 'utf-8'):
        super().__init__(file_path)
        self.encoding = encoding

    def lazy_load(self):
        try:
            with open(self.file_path, encoding=self.encoding) as f:
                text = f.read()
            return [text]
        except UnicodeDecodeError as e:
            raise RuntimeError(f"Error loading {self.file_path}") from e
            
def process_text_file(text_filename, encoding='utf-8'):
    # Load text from the text file
    loader = TextLoader(text_filename, encoding)
    text_content = loader.load()

    # Split text using RecursiveCharacterTextSplitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    text_chunks = text_splitter.split_documents(text_content)

    return text_chunks

# Example usage
text_filename = "concatenated_text.txt"
documents = process_text_file(text_filename)

In [24]:
medical_store = Chroma.from_documents(documents, hf, collection_metadata={"hnsw:space": "cosine"}, persist_directory="medical_chroma_cosine")

In [25]:
import shutil

# Source directory to be zipped
source_dir = r'medical_chroma_cosine'
# Destination zip file path
zip_file_path = r'medical_chroma_cosine.zip'

# Create a zip file
shutil.make_archive(zip_file_path, 'zip', source_dir)

print(f"Directory {source_dir} zipped successfully into {zip_file_path}.zip")

Directory medical_chroma_cosine zipped successfully into medical_chroma_cosine.zip.zip


In [26]:
load_guidelines_store = Chroma(persist_directory="medical_chroma_cosine", embedding_function=hf)

In [27]:
retriever_guidelines = load_guidelines_store.as_retriever(search_type = "similarity", search_kwargs = {"k":3})

In [28]:
lotr = MergerRetriever(retrievers=[retriever_guidelines])

In [29]:
query = "As a doctor, how to treat stomach ulcers?"

In [30]:
docs = lotr.get_relevant_documents(query)
docs

[Document(metadata={'source': 'concatenated_text.txt'}, page_content='as ibuprofen or aspirin – particularly if they’re taken for a long time or at high doses There’s little evidence that stress or certain foods causes stomach ulcers. More about the causes of stomach ulcers Treating stomach ulcers You’ll be treated using antibiotics if your ulcer was caused by a H. pylori infection. This kills the bacteria and should prevent the ulcer coming back. You’ll be treated using a proton pump inhibitor (PPI) if your ulcer was caused by NSAIDs. Your doctor will prescribe these and discuss whether you should keep using NSAIDs. Alternative medication to NSAIDs, such as paracetamol , may be recommended. Most stomach ulcers take a few months to heal after treatment. However, stomach ulcers can come back after treatment, although this is less likely to happen if the underlying cause is addressed. More about treating stomach ulcers Complications Complications are rare but can be very serious and pote

In [31]:
! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain



You should consider upgrading via the 'C:\Users\Admin\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [32]:
! pip install langchain_groq



You should consider upgrading via the 'C:\Users\Admin\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [33]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'), additional_kwargs={})])

In [None]:
chat_model = ChatGroq(temperature=0,
              model_name="llama3-8b-8192",
              api_key=api_key)

In [35]:
chain = prompt | chat_model

In [36]:
chain.invoke({"context":docs,"question":query}).content

'According to the provided context, as a doctor, you would treat stomach ulcers depending on what caused them. Here are the treatment options mentioned:\n\n* If the ulcer was caused by a Helicobacter pylori (H. pylori) infection, you would prescribe:\n\t+ A course of antibiotics (amoxicillin, clarithromycin, or metronidazole)\n\t+ A medication called a proton pump inhibitor (PPI)\n* If the ulcer was caused by non-steroidal anti-inflammatory drugs (NSAIDs), you would prescribe:\n\t+ A proton pump inhibitor (PPI)\n\t+ Alternative medication to NSAIDs, such as paracetamol\n* In some cases, you may also recommend lifestyle changes, such as quitting smoking, as smoking can increase the risk of developing stomach ulcers and make treatment less effective.'

In [37]:
print("As a doctor, you can follow this steps for the patient's Hepatitis B treatment:")
print("Prescribing medications to help with itchiness, nausea, or vomiting, if needed. Providing advice on painkillers such as")
print("paracetamol or ibuprofen for any aches and pains. Recommending a cool, airy environment, loose clothing, and smaller, lighter")
print("meals to help reduce feeling sick and vomiting. Advising on the importance of getting plenty of rest.")


As a doctor, you can follow this steps for the patient's Hepatitis B treatment:
Prescribing medications to help with itchiness, nausea, or vomiting, if needed. Providing advice on painkillers such as
paracetamol or ibuprofen for any aches and pains. Recommending a cool, airy environment, loose clothing, and smaller, lighter
meals to help reduce feeling sick and vomiting. Advising on the importance of getting plenty of rest.
