In [6]:
# update code
from datasets import load_dataset
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document

# Load data from the Hugging Face dataset
dataset = load_dataset("Malikeh1375/medical-question-answering-datasets", "all-processed")

# Since the dataset may contain multiple splits (e.g., train, test), select the relevant split
# Here we are selecting the 'train' split as an example
data = dataset['train']

# Extract text data (adjust according to the actual structure of your dataset)
# Include both 'input' and 'output' fields
documents = [Document(page_content=item['input'], metadata={'answer': item['output']}) for item in data]

# Text splitting
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=600,
    chunk_overlap=0,
)

# Split the documents and maintain both fields
split_text = []
for doc in documents:
    chunks = text_splitter.split_documents([doc])
    for chunk in chunks:
        split_text.append({
            "page_content": chunk.page_content,
            "answer": doc.metadata['answer']
        })

# Print the type and first chunk of split text for verification
print(type(split_text))
print(split_text[0])



<class 'list'>
{'page_content': 'Hey Just wondering.  I am a 39 year old female, pretty smallMy heart rate is around 97 to 106 at rest, and my BP is 140/90 and twice I get 175/118I did visit a doctor because I  didnt feel well past month or twoThen the doctor gave me a heart medicine to take the pulse down and BP  (its still in further examination.)But I wondering what it can be? Do I need the medicine really?  Is that bad ?', 'answer': "hello and thank you for using chatbot. i carefully read your question and i understand your concern. i will try to explain you something and give you my opinion. we talk about hypertension if we have mean value that exceeds 140 / 90 mmhg. a person might have high value during emotional and physicals trees so it's mandatory to judge on mean values. usaly hypertension does not give any symptoms but left untreated he slowly modifies the heart. according to heart rhythm, the normal rate is between 50-100 beat for minute. when it exceeds 100 we talk about s

In [8]:
# from langchain_huggingface import HuggingFaceEmbeddings  
# from langchain_community.vectorstores import Chroma

# persist_directory = "D:\\DiagnoTechHub\\FYP\\Final Chatbot\\AllMini-L6"
# vectorDB = Chroma.from_documents(split_text,embedding_function=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"))

# # Persist the database to disk
# vectorDB.persist()

In [7]:
# Loading the Vector Database
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma

persist_directory = "D:\\Uni Work\\FYP\\Integration\\chatbot\\AllMini-L6"
vectorDB = Chroma(persist_directory=persist_directory, embedding_function=OllamaEmbeddings(model="all-minilm:l6-v2"))


In [8]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
        template=("""
        You are a helpful assistant. You can answer questions about query.
        {context}

        Question:{input}
        """)       
)

In [9]:
from langchain_community.llms import Ollama
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

llm = Ollama(model="gemma:2b")

# prompt chain
doc_chain = create_stuff_documents_chain(llm,prompt)

# configures the vectorDB object to act as a retriever for finding similar items to a given query
retiver = vectorDB.as_retriever()


#  combines the retriever and the document processing chain (doc_chain) into a single retrieval chain. chain used to retrieve and process documents based on a query
retrival_chain = create_retrieval_chain(retiver,doc_chain)

responce = retrival_chain.invoke({"input":"define is atelectasis?"})
responce['answer']
# responce

"Sure, here's a definition of atelectasis:\n\n**Atelectasis** is a condition in which the lungs fail to expand properly. This can be caused by a number of underlying medical conditions, including asthma, bronchitis, pneumonia, and heart failure. Symptoms of atelectasis can include shortness of breath, coughing, chest pain, and rapid breathing."