In [1]:
from haystack.telemetry import tutorial_running
tutorial_running(27)

In [2]:
import os
from getpass import getpass

if "WEAVIATE_API_KEY" not in os.environ:
  os.environ["WEAVIATE_API_KEY"] = getpass("Enter your Weaviate API Key")

if "MISTRAL_API_KEY" not in os.environ:
  os.environ["MISTRAL_API_KEY"] = getpass("Enter your Mistral API Key")

In [3]:
## init the DocumentStore

from haystack_integrations.document_stores.weaviate import WeaviateDocumentStore, AuthApiKey
from haystack import Document
import os

auth_client_secret = AuthApiKey()

WEAVIATE_URL = "https://6tmywlqskmo4mvbpfbrg.c0.europe-west3.gcp.weaviate.cloud" # need to check this
document_store = WeaviateDocumentStore(url=WEAVIATE_URL,
                                       auth_client_secret=auth_client_secret)

In [4]:
## Fetch data from datasets import load_dataset

import os
from haystack import Document

def load_local_dataset(directory_path):
    documents = []
    for root, _, files in os.walk(directory_path):
        for file in files:
            if file.endswith(".txt") or file.endswith(".md"):
                with open(os.path.join(root, file), "r", encoding="utf-8") as f:
                    content = f.read()
                documents.append(Document(content=content, meta={"filename": file}))
    return documents

# Load your dataset
local_dataset_path = "./processed_data/patient_1"
documents = load_local_dataset(local_dataset_path)


In [5]:
documents.__len__()

4

### Mistral Doc Embedder 

In [6]:
from haystack import Document
from haystack_integrations.components.embedders.mistral.document_embedder import MistralDocumentEmbedder

Mistral_doc_embedder = MistralDocumentEmbedder(model="mistral-embed")


In [None]:
# Write Documents to the DocumentStore
docs_with_embeddings = Mistral_doc_embedder.run(documents)
document_store.write_documents(docs_with_embeddings["documents"])

## RAG 

### Query Text embedder from Mistral

In [7]:
from haystack_integrations.components.embedders.mistral.text_embedder import MistralTextEmbedder

embedder = MistralTextEmbedder(model="mistral-embed")

result = embedder.run(text="How can I ise the Mistral embedding models with Haystack?")

print(result['embedding'])


[-0.0015687942504882812, 0.052154541015625, 0.037109375, 0.0019359588623046875, 0.041473388671875, -0.014984130859375, -0.01279449462890625, -0.00823211669921875, -0.0180816650390625, 0.016357421875, -0.0384521484375, 0.0594482421875, -0.0162811279296875, 0.007167816162109375, -0.0626220703125, 0.08380126953125, 0.0230255126953125, 0.0076904296875, 0.0280914306640625, 0.0011196136474609375, -0.0287322998046875, 0.01346588134765625, -0.00827789306640625, 0.0252838134765625, -0.0233917236328125, -0.01136016845703125, 0.009918212890625, -0.031982421875, -0.0242462158203125, -0.0221099853515625, 0.032623291015625, -0.020843505859375, 0.0195159912109375, -0.0260009765625, 0.0197601318359375, 0.0005555152893066406, -0.016754150390625, -0.035186767578125, 0.022674560546875, -0.00901031494140625, -0.01776123046875, 0.022979736328125, -0.058837890625, -0.00452423095703125, -0.04486083984375, -0.0171356201171875, -0.0386962890625, -0.0218353271484375, 0.00855255126953125, -0.024566650390625, 0.0

### init the Retrival from weaviate

In [2]:

from haystack_integrations.components.retrievers.weaviate import WeaviateEmbeddingRetriever

# Initialize the Weaviate retriever
retriever = WeaviateEmbeddingRetriever(document_store=document_store)


NameError: name 'document_store' is not defined

### Define Template for Patient Medical Record Prompt 

In [9]:
from haystack.components.builders import ChatPromptBuilder
from haystack.dataclasses import ChatMessage

technical_template = [
    ChatMessage.from_user(
        """
Please provide a detailed and technical answer to the following question based on the medical information available:

Question: {{ question }}

Context:
{% for document in documents %}
    {{ document.content }}
{% endfor %}

Technical Details:
        """
    )
]

technical_prompt_builder = ChatPromptBuilder(template=technical_template)


simplification_template = [
    ChatMessage.from_user(
        """
Now, let's translate the medical information into simpler terms for better understanding. Here's the detailed answer to your query:

Technical Answer: {{ technical_response }}

**Here’s what you need to know**:  
[Provide a simplified, easy-to-understand explanation of the answer, as if talking directly to the patient not mention his name, you know him.]

**What you can do next**:  
- **Medications**: [Provide clear, actionable advice about medications, if this only exist in the medical record e.g., "Take your insulin as prescribed, and make sure to monitor your blood sugar levels regularly."]  
- **Activities**: [Suggest practical activities, if this only exist in the medical record e.g., "Try to go for a 20-minute walk every day—it’s great for your health!"]  
- **Dietary Tips**: [Offer simple dietary advice, if this only exist in the medical record e.g., "Include more leafy greens and whole grains in your meals, and try to avoid sugary snacks."]  

**Remember**: You’re doing great, and I’m here to support you every step of the way. If you have any more questions or need further clarification, just ask! I’m always here to help. 😊

What else would you like to know?


        """
    )
]

simplification_prompt_builder = ChatPromptBuilder(template=simplification_template)


# inti the Chat Generator  Mistral 

In [10]:

from haystack_integrations.components.generators.mistral import MistralChatGenerator
from haystack.components.generators.utils import print_streaming_chunk
from haystack.dataclasses import ChatMessage
from haystack.utils import Secret

# mistral_chat_generator = MistralChatGenerator(streaming_callback=print_streaming_chunk)


technical_llm = MistralChatGenerator(streaming_callback=print_streaming_chunk)  # LLM for technical response
simplification_llm = MistralChatGenerator(streaming_callback=print_streaming_chunk)  # LLM for simplified respon

## RAG pipeline 

In [11]:
from haystack import Pipeline

basic_rag_pipeline = Pipeline()

# Add components to your pipeline
basic_rag_pipeline.add_component("text_embedder", embedder)
basic_rag_pipeline.add_component("retriever", retriever)
basic_rag_pipeline.add_component("technical_prompt_builder", technical_prompt_builder)
basic_rag_pipeline.add_component("technical_llm", technical_llm)
basic_rag_pipeline.add_component("simplification_prompt_builder", simplification_prompt_builder)
basic_rag_pipeline.add_component("simplification_llm", simplification_llm)



In [12]:
# Connect the components
basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
basic_rag_pipeline.connect("retriever.documents", "technical_prompt_builder.documents")
basic_rag_pipeline.connect("technical_prompt_builder.prompt", "technical_llm.messages")
basic_rag_pipeline.connect("technical_llm.replies", "simplification_prompt_builder.technical_response")
basic_rag_pipeline.connect("simplification_prompt_builder.prompt", "simplification_llm.messages")

<haystack.core.pipeline.pipeline.Pipeline object at 0x7f625ad07a30>
🚅 Components
  - text_embedder: MistralTextEmbedder
  - retriever: WeaviateEmbeddingRetriever
  - technical_prompt_builder: ChatPromptBuilder
  - technical_llm: MistralChatGenerator
  - simplification_prompt_builder: ChatPromptBuilder
  - simplification_llm: MistralChatGenerator
🛤️ Connections
  - text_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> technical_prompt_builder.documents (List[Document])
  - technical_prompt_builder.prompt -> technical_llm.messages (List[ChatMessage])
  - technical_llm.replies -> simplification_prompt_builder.technical_response (List[ChatMessage])
  - simplification_prompt_builder.prompt -> simplification_llm.messages (List[ChatMessage])

In [13]:
# Define a function to run the pipeline
def ask_question(question):
    results = basic_rag_pipeline.run(
        {
            "text_embedder": {"text": question},  # Embed the question
            "technical_prompt_builder": {"question": question},  # Build the technical prompt
        }
    )
    return results["simplification_llm"]["replies"][0]  # Return the simplified response

# Now, connect the components to each other

basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
basic_rag_pipeline.connect("retriever", "technical_prompt_builder")
basic_rag_pipeline.connect("technical_prompt_builder.prompt", "llm.messages")

basic_rag_pipeline.connect("llm.replies", "simplification_prompt_builder.technical_response")
basic_rag_pipeline.connect("simplification_prompt_builder.prompt", "llm.messages")

basic_rag_pipeline.show()

## Validation 


In [None]:
def process_question(question, documents, technical_pipeline, simplification_pipeline):
    # Generate technical response
    technical_result = technical_pipeline.run({"question": question, "documents": documents})
    technical_response = technical_result["llm"]["replies"][0].text

    # Simplify the technical response
    simplified_result = simplification_pipeline.run({"technical_response": technical_response})
    patient_friendly_response = simplified_result["llm"]["replies"][0].text

    return patient_friendly_response


In [14]:
import json

def ask_rag_system(question, basic_rag_pipeline):
    """
    Integrates with the RAG pipeline to ask a question and return the response.

    Args:
    question (str): The question to ask the RAG system.
    basic_rag_pipeline: The instantiated RAG pipeline object.

    Returns:
    str: The text response from the RAG system.
    """
    # Adjust this according to your pipeline's specific configuration and method calls.
    response = basic_rag_pipeline.run({
        "text_embedder": {"text": question},
        "prompt_builder": {"question": question}
    })
    return response["llm"]["replies"][0].text

def test_rag_system(test_data, basic_rag_pipeline):
    results = []
    for test in test_data:
        response = ask_rag_system(test['question'], basic_rag_pipeline)
        
        results.append({
            "question": test['question'],
            "expected": test['expected_answer'],
            "response": response,
    
        })
        
        # Improved output
        print("--------------------------------------------------")
        print(f"Question: {test['question']}")
        print(f"Expected Answer: {test['expected_answer']}")
        print(f"RAG's Response: {response}")
        print("--------------------------------------------------\n")

    return results




In [15]:

# Load your JSON test data (assuming it's saved as `test_data.json`)
with open('./processed_data/patient_1/Q_A.json', 'r') as file:
    test_data = json.load(file)


In [22]:
test_data[0]

{'question': 'What were the main symptoms I had during my latest visit?',
 'expected_answer': 'You presented with worsening shortness of breath, intermittent chest tightness, and swelling in your lower extremities.'}

In [23]:

answer = ask_question(test_data[0]["question"])
print(f"Simplified Answer: {answer}")

Based on the medical information provided in your patient medical record, the main symptoms you had during your latest visit were worsening shortness of breath, intermittent chest tightness, and swelling in your lower extremities. These symptoms are consistent with heart failure, as suggested by the echocardiogram findings of mild left ventricular hypertrophy, ejection fraction ~55%, and some signs of volume overload. Additionally, the chest X-ray findings of mild cardiomegaly and prominent pulmonary vasculature are also indicative of heart failure.

Your poor glycemic control, as indicated by your Hemoglobin A1c of 9.2%, and recent worsening of diabetes-related symptoms such as polydipsia and polyuria, are also important to consider in your overall clinical presentation.

Furthermore, the swelling in your lower extremities, also known as peripheral edema, is a common symptom of heart failure and is likely related to fluid retention due to poor cardiac function. The edema is also consi

In [None]:

test_results = test_rag_system([test_data[0]], basic_rag_pipeline)

In [None]:

# Assuming `basic_rag_pipeline` is already instantiated and configured properly
# Here you would need to replace `basic_rag_pipeline` with your actual RAG pipeline object
# For demonstration, I'm assuming this object is available here as `basic_rag_pipeline`

# Run the test
test_results = test_rag_system(test_data, basic_rag_pipeline)
for result in test_results:
    print(result)