In [27]:
import os
import base64
import getpass
import json
from ibm_watsonx_ai import APIClient, Credentials
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai.foundation_models.utils import Toolkit

def get_encoded_credentials():
    return {
        "url": "https://eu-gb.ml.cloud.ibm.com",
        "apikey": base64.b64encode(getpass.getpass("Please enter your API key: ").encode("ascii")).decode("ascii")
    }
background_vector_index_id="b3c5ee54-0637-487c-b9d7-e805713a1724"
space_id = "f68919bd-af1c-49cb-97e4-49c68748b88b"
project_id = "0989d54e-bebc-4a33-b6bd-c3331cdef4d9"
vector_index_id = "8ef82ae7-0818-4821-8d0f-f6a7ca3234d8"
ai_params = {
    "encoded_credentials": get_encoded_credentials(),
    "space_id": space_id,
    "project_id": project_id
    
}


In [28]:
with open("prompt_mapping_with_class.json", "r", encoding="utf-8") as f:
    question_bank = json.load(f)

def find_question_by_main(main_content, question_bank):
    
    for q in question_bank:
        if q.get("Prompt_Main") == main_content:
            return q

    for q in question_bank:
        if str(q.get("id", "")) == str(main_content):
            return q

    # Substring
    for q in question_bank:
        if main_content and main_content in q.get("Prompt_Main", ""):
            return q

    # phrases/keywords
    content_words = set(main_content.lower().split())
    max_overlap = 0
    best_q = None
    for q in question_bank:
        main_text = q.get("Prompt_Main", "")
        main_words = set(main_text.lower().split())
        overlap = len(content_words & main_words)
        if overlap > max_overlap and overlap > 0:
            max_overlap = overlap
            best_q = q
    if best_q:
        return best_q
    return None

In [29]:
#  RAG vector search

def remote_vector_search(query, api_client, vector_index_id, space_id):
    
    document_search_tool = Toolkit(api_client=api_client).get_tool("RAGQuery")
    config = {
        "vectorIndexId": vector_index_id,
        "spaceId": space_id
        
    }
    results = document_search_tool.run(
        
        input=query,
        config=config
    )
   
    main_questions = results.get("documents", [])
    if not main_questions:
       
        output = results.get("output", "")
       
        main_questions = [x.strip() for x in output.split('\n\n') if x.strip()]
    return main_questions


In [30]:
def retrieve_background_knowledge(query, api_client, background_vector_index_id, space_id):
    """
    Retrieve top knowledge/background snippets to provide context for question selection.
    """
    tool = Toolkit(api_client=api_client).get_tool("RAGQuery")
    config = {
        "vectorIndexId": background_vector_index_id,
        "spaceId": space_id
    }
    results = tool.run(input=query, config=config)
    knowledge = results.get("documents", [])
    if not knowledge:
        knowledge = [x.strip() for x in results.get("output", "").split('\n\n') if x.strip()]
    return "\n".join(knowledge)


In [31]:
import json
import os
import re

def expand_keywords(current_query, qna_history, prev_keywords, ai_params, history_save_path="keywords_history.json"):
    """
    AI outputs and ranks keywords with confidence score.
    Only single-word keywords, ordered by importance, each with a confidence value (0-1).
    History is saved as a JSON array: [{"keywords": [...], "scores": [...]}] per turn.
    """
    credentials = ai_params["encoded_credentials"]
    space_id = ai_params["space_id"]
    model_id = "meta-llama/llama-4-maverick-17b-128e-instruct-fp8"
    api_client = APIClient({
        "url": credentials["url"],
        "apikey": base64.b64decode(credentials["apikey"]).decode("ascii")
    })
    model = ModelInference(
        model_id=model_id,
        api_client=api_client,
        params={"max_new_tokens": 200, "temperature": 0.1},
        space_id=space_id
    )

    #  knowledge
    knowledge_vector_index_id = "b3c5ee54-0637-487c-b9d7-e805713a1724"
    document_search_tool = Toolkit(api_client=api_client).get_tool("RAGQuery")
    config = {
        "vectorIndexId": knowledge_vector_index_id,
        "spaceId": space_id
    }
    pnf_results = document_search_tool.run(
        input=current_query,
        config=config
    )
    pnf_text = pnf_results.get("output", "")

    # Prompt: force AI to return ordered, single-word, confidence-labeled keywords (JSON)
    prompt_expand = f"""
You are an ALS/MND clinical assistant. Below is reference material about ALS/MND patient needs and life quality factors:
---
{pnf_text}
---
Here is the patient's Q&A history: {json.dumps(qna_history, ensure_ascii=False)}
Current keyword pool: {', '.join(prev_keywords)}
Main complaint: '{current_query}'

Task:
- Generate an ordered list  of the most important, specific, **single-word English keywords** for this patient's situation.
- Only output a comma-separated list of single words. Do NOT include confidence scores, sentences, or the original complaint.
- Do NOT include the original complaint or generic/diagnosis words.
- The first keyword should always be the most important.
- maximum 8 words.

Return ONLY the comma-separated keywords, nothing else. DO NOT REPEAT ANY WORD! Stop when you think there's enough keywords
"""

    result_expand = model.generate(prompt=prompt_expand)
    raw = result_expand["results"][0]["generated_text"].strip()

    
    keywords = [w.strip().lower() for w in raw.split(",") if w.strip().isalpha() and w.lower() != current_query.lower()]
    print(f"[Keyword pool for this turn]: {keywords}")

    # Save history to json
    if os.path.exists(history_save_path):
        with open(history_save_path, "r", encoding="utf-8") as f:
            all_hist = json.load(f)
    else:
        all_hist = []
    all_hist.append(keywords)
    with open(history_save_path, "w", encoding="utf-8") as f:
        json.dump(all_hist, f, indent=2, ensure_ascii=False)

    return keywords


In [32]:
#test
# def test_expand_keywords():
    
#     initial_query = "I feel tired and have trouble walking."
#     qna_history = [
#         {"question": "How often do you feel tired during the day?", "answer": "Almost every day."},
#         {"question": "Have you experienced any recent falls?", "answer": "Yes, twice last month."}
#     ]
    
#     prev_keywords = ["fatigue", "mobility"]
    
#     new_keywords = expand_keywords(initial_query, qna_history, prev_keywords, ai_params)
#     print("Expand result (ordered keywords):", new_keywords)

# if __name__ == "__main__":
#     test_expand_keywords()


In [33]:
def ai_select_question_main(
    candidate_main_questions, dialogue, ai_params,
    background_knowledge, patient_query
):
    credentials = ai_params["encoded_credentials"]
    space_id = ai_params["space_id"]
    model_id = "meta-llama/llama-4-maverick-17b-128e-instruct-fp8"
    api_client = APIClient({
        "url": credentials["url"],
        "apikey": base64.b64decode(credentials["apikey"]).decode("ascii")
    })
    model = ModelInference(
        model_id=model_id,
        api_client=api_client,
        params={"max_new_tokens":2000, "temperature": 0.1},
        space_id=space_id
    )
    prompt = f"""You are an ALS expert assistant. 
Use the following  patient's concern, background knowledge, candidate questions and dialogue history to select the most relevant and helpful question to ask next.
The purpose of the questions is to assess the patient's quality of life. Therefore, it is crucial to ask the appropriate questions. You need to carefully evaluate whether these questions can truly address the patient's needs.
If none of the candidate questions are suitable, generate a new, short and specific question.
Do Not ASK The SAME QUESTION AGAIN!
Candidate questions:
{json.dumps(candidate_main_questions, ensure_ascii=False)}

Patient's main concern:
{patient_query}

Dialogue history:
{json.dumps(dialogue, ensure_ascii=False)}

Background knowledge:
{background_knowledge}


Output ONLY the question text (no explanations, no formatting).
"""
    result = model.generate(prompt=prompt)
    if isinstance(result, dict) and result.get("results"):
        question_main = result["results"][0]["generated_text"].strip()
    elif isinstance(result, str):
        question_main = result.strip()
    else:
        question_main = str(result).strip()
    return question_main





In [34]:
def ai_should_ask_followup(followup_question, qna_history, ai_params):
    
    credentials = ai_params["encoded_credentials"]
    space_id = ai_params["space_id"]
    model_id = "meta-llama/llama-4-maverick-17b-128e-instruct-fp8"
    api_client = APIClient({
        "url": credentials["url"],
        "apikey": base64.b64decode(credentials["apikey"]).decode("ascii")
    })
    model = ModelInference(
        model_id=model_id,
        api_client=api_client,
        params={"max_new_tokens": 300
                , "temperature": 0.1},
        space_id=space_id
    )
    prompt = f"""Given the following Q&A history:
{json.dumps(qna_history, ensure_ascii=False)}
The purpose of the questions is to assess the patient's quality of life.
Should we ask this follow-up question: '{followup_question}' to get more helpful information?
Reply only with 'yes' or 'no'."""
    result = model.generate(prompt=prompt)
    # take the text content
    if isinstance(result, dict) and result.get("results"):
        ai_text = result["results"][0]["generated_text"].strip().lower()
    elif isinstance(result, str):
        ai_text = result.strip().lower()
    else:
        ai_text = str(result).strip().lower()
    return ai_text.startswith("y")

In [35]:
def generate_pre_help_with_rag(
    initial_query: str,
    qna_history: list,
    background_knowledge: str,
    ai_params: dict,
    max_items: int = 5
) -> dict:
    """
    Produce immediate, RAG-grounded help BEFORE asking any question.
    The model can only use URLs present in `background_knowledge`.
    Returns a strict JSON dict with quick advice and vetted resources.
    """
    import re, json, base64
    from ibm_watsonx_ai import APIClient
    from ibm_watsonx_ai.foundation_models import ModelInference

    credentials = ai_params["encoded_credentials"]
    space_id = ai_params["space_id"]
    model_id = "meta-llama/llama-3-3-70b-instruct"

    api_client = APIClient({
        "url": credentials["url"],
        "apikey": base64.b64decode(credentials["apikey"]).decode("ascii")
    })
    model = ModelInference(
        model_id=model_id,
        api_client=api_client,
        params={
            "max_new_tokens": 280,  
            "temperature": 0.2,
            "top_p": 0.9
        },
        space_id=space_id
    )

    prompt = f"""
You are an ALS/MND assistant. Provide IMMEDIATE, PRACTICAL help based ONLY on BACKGROUND (RAG output).
DO NOT invent URLs. If BACKGROUND contains no URLs, set "resources": [].

Return ONLY a valid JSON object with these keys (keep items short, concrete):

BACKGROUND (RAG):
---
{background_knowledge}
---

INITIAL CONCERN:
{initial_query}

DIALOGUE SO FAR (may be empty):
{json.dumps(qna_history[-6:], ensure_ascii=False)}

Rules:
- No prose, no preambles. JSON object only.
- Advice must be conservative and non-diagnostic.
- If safety risk is possible, include a red flag item.
"""

    out = model.generate(prompt=prompt)
    text = out["results"][0]["generated_text"].strip()

    # Minimal JSON extraction (robust to extra tokens)
    m = re.search(r'\{.*\}', text, re.DOTALL)
    if m:
        try:
            data = json.loads(m.group())
            # light defaults
            data.setdefault("quick_advice", [])
            data.setdefault("self_checks", [])
            data.setdefault("resources", [])
            data.setdefault("red_flags", [])
            return data
        except Exception:
            pass

    # Fallback minimal payload
    return {
        "quick_advice": [
            "Keep a short symptom log (what/when/severity).",
            "Note triggers and what helps or worsens."
        ],
        "self_checks": [
            "Track sleep quality and daytime alertness.",
            "Record breathlessness episodes and activity context."
        ],
        "resources": [],
        "red_flags": []
    }


In [36]:
def pnm_score_single_dimension_rag(
    initial_query,
    qna_history,
    background_knowledge,
    ai_params,
    target_dimension=None
):
    """
    Score exactly ONE PNM dimension (0–7) using RAG.
    - If target_dimension is provided, score that dimension.
    - Else, the model must choose a single most central dimension from the list.
    - Returns a strict JSON dict:
      {
        "dimension": "<one of fixed list>",
        "score_0_7": 0-7,
        "confidence_0_1": 0.00-1.00,
        "rationale": "≤20 words",
        "evidence_turn_ids": [indices from the enumerated Q&A below]
      }
    """
    import base64, json, re
    from ibm_watsonx_ai import APIClient
    from ibm_watsonx_ai.foundation_models import ModelInference

    credentials = ai_params["encoded_credentials"]
    space_id = ai_params["space_id"]
    model_id = "meta-llama/llama-4-maverick-17b-128e-instruct-fp8"

    api_client = APIClient({
        "url": credentials["url"],
        "apikey": base64.b64decode(credentials["apikey"]).decode("ascii")
    })
    model = ModelInference(
        model_id=model_id,
        api_client=api_client,
        params={"max_new_tokens": 5000, "temperature": 0.2, "top_p": 0.9},
        space_id=space_id
    )

    # Enumerate last up to 12 Q&A turns for evidence selection
    recent = qna_history[-12:]
    enumerated = []
    for i, qa in enumerate(recent, 1):
        enumerated.append({"id": i, "q": qa.get("question", ""), "a": qa.get("answer", "")})

    allowed_dims = [
        "Physiological","Safety","Love & Belonging","Esteem",
        "Self-Actualisation","Cognitive","Aesthetic","Transcendence"
    ]
    must_dim = f"\nYou MUST score this dimension only: {target_dimension}\n" if target_dimension else \
               "\nChoose the single MOST CENTRAL dimension from the list below and score that only.\n"

    prompt = f"""
You are an ALS/MND assistant. Use BACKGROUND (RAG) and DIALOGUE to score exactly ONE PNM dimension on a 0–7 scale.

Allowed dimensions:
- Physiological
- Safety
- Love & Belonging
- Esteem
- Self-Actualisation
- Cognitive
- Aesthetic
- Transcendence
{must_dim}

BACKGROUND (RAG):
---
{background_knowledge}
---

INITIAL CONCERN:
{initial_query}

DIALOGUE (enumerated, last up to 12 turns):
{json.dumps(enumerated, ensure_ascii=False)}

Output ONLY this strict JSON (no extra text):
{{
  "dimension": "<one from the allowed list>",
  "score_0_7": 0,
  "confidence_0_1": 0.80,
  "rationale": "≤20 words",
  "evidence_turn_ids": [1,3]
}}
Rules:
- If evidence is limited, pick the most conservative score and lower confidence.
- Keep rationale ≤20 words.
"""

    res = model.generate(prompt=prompt)
    text = res["results"][0]["generated_text"].strip()

    # Minimal JSON extraction
    m = re.search(r'\{.*\}', text, re.DOTALL)
    if m:
        try:
            out = json.loads(m.group())
            # light validation
            if out.get("dimension") in allowed_dims and isinstance(out.get("score_0_7"), int):
                return out
        except Exception:
            pass

    # Fallback (deterministic, conservative)
    fallback_dim = target_dimension if target_dimension in allowed_dims else "Physiological"
    return {
        "dimension": fallback_dim,
        "score_0_7": 0,
        "confidence_0_1": 0.0,
        "rationale": "Insufficient evidence.",
        "evidence_turn_ids": []
    }

In [37]:
def generate_structured_summary_with_rag(
    initial_query,
    qna_history,
    keywords_history,
    background_knowledge,
    ai_params,
    max_words=500,
    target_dimension=None  
):
    """
    Minimal, RAG-grounded summary with single-dimension PNM scoring.
    - Links and content are grounded ONLY by `background_knowledge` (your RAG results).
    - Returns structured JSON including `pnm_scoring` for ONE dimension.
    - Signature is backward compatible (new arg has default).
    """
    import base64, json, re
    from ibm_watsonx_ai import APIClient
    from ibm_watsonx_ai.foundation_models import ModelInference

    credentials = ai_params["encoded_credentials"]
    space_id = ai_params["space_id"]
    model_id = "meta-llama/llama-4-maverick-17b-128e-instruct-fp8"

    api_client = APIClient({
        "url": credentials["url"],
        "apikey": base64.b64decode(credentials["apikey"]).decode("ascii")
    })
    model = ModelInference(
        model_id=model_id,
        api_client=api_client,
        params={"max_new_tokens": 3000, "temperature": 0.2, "top_p": 0.9},
        space_id=space_id
    )

    # 1) Ask model to produce structured JSON summary (links ONLY if present in BACKGROUND)
    prompt = f"""
You are an ALS/MND assistant. Use ONLY the BACKGROUND section for external links (do not invent URLs).
Summarize the multi-turn dialogue into a strict JSON with these exact keys:

{{
  "summary": "empathetic, concise paragraph <= {max_words} words",
  "key_concerns": ["short nouns/terms, 3-6 items"],
  "insights": ["short bullets, 2-6 items"],
  "recommendations": ["actionable, grounded in BACKGROUND, 3-6 items"],
  "resources": [{{"title":"...", "url":"https://..."}}, ...],  // ONLY links present in BACKGROUND; else []
  "retrieval_prompts": ["short queries for further retrieval, up to 5"]
}}

BACKGROUND (RAG output; may contain URLs):
---
{background_knowledge}
---

INITIAL CONCERN:
{initial_query}

DIALOGUE HISTORY (last 12 turns max):
{json.dumps(qna_history[-12:], ensure_ascii=False)}

LATEST KEYWORDS:
{json.dumps(keywords_history[-1] if keywords_history else [], ensure_ascii=False)}

Rules:
- Output ONLY a valid JSON object, no extra text or code fences.
- Do NOT fabricate any URL. If no URLs exist in BACKGROUND, set "resources": [].
- Keep fields short and concrete.
"""

    result = model.generate(prompt=prompt)
    text = result["results"][0]["generated_text"].strip()

    m = re.search(r'\{.*\}', text, re.DOTALL)
    if m:
        try:
            summary_json = json.loads(m.group())
        except Exception:
            summary_json = None
    else:
        summary_json = None

    # 2) If summary failed, make a small deterministic fallback
    if summary_json is None:
        latest_keywords = keywords_history[-1] if keywords_history else []
        summary_json = {
            "summary": f"The patient reports: {initial_query}.",
            "key_concerns": latest_keywords[:5],
            "insights": [],
            "recommendations": ["We will ask a few targeted questions and provide practical resources."],
            "resources": [],
            "retrieval_prompts": [f"ALS {k} practical guide" for k in latest_keywords[:3]]
        }

    # 3) Single-dimension PNM scoring (separate RAG AI call)
    pnm_result = pnm_score_single_dimension_rag(
        initial_query=initial_query,
        qna_history=qna_history,
        background_knowledge=background_knowledge,
        ai_params=ai_params,
        target_dimension=target_dimension
    )
    # attach to summary
    summary_json["pnm_scoring"] = pnm_result

    # Print for debug and return
    print("[Structured summary]:\n", json.dumps(summary_json, indent=2, ensure_ascii=False))
    return summary_json

In [38]:
def print_main_candidates(candidate_main_list):
    print(f"\n{candidate_main_list}\n")


In [39]:
def smart_rag_multi_turn(ai_params, question_bank, max_turns=7, max_total_qa=15):
    credentials = ai_params["encoded_credentials"]
    api_client = APIClient({
        "url": credentials["url"],
        "apikey": base64.b64decode(credentials["apikey"]).decode("ascii")
    })
    vector_index_id = "8ef82ae7-0818-4821-8d0f-f6a7ca3234d8"
    background_vector_index_id = "b3c5ee54-0637-487c-b9d7-e805713a1724"
    space_id = ai_params["space_id"]
    api_client.set.default_space(space_id)

    initial_query = input("Enter the patient's initial concern: ").strip()
    qna_history = []
    keywords_history = []
    keywords_pool = []
    background_knowledge = ""   

    keywords_pool = expand_keywords(initial_query, [], [], ai_params)
    keywords_history.append(keywords_pool)
    print(f"[Keywords after initialization]: {keywords_pool}")

    used_main_questions = set()

    for turn in range(max_turns):
        if not keywords_pool or len(qna_history) >= max_total_qa:
            print("[AI] No new keywords or Q&A limit reached, conversation finished.")
            break

        print(f"[Current keyword pool, turn {turn+1}]: {keywords_pool}")

        candidate_main_list = []
        for kw in keywords_pool:
            main_questions = remote_vector_search(kw, api_client, vector_index_id, space_id)
            for main_q in main_questions:
                if main_q and main_q not in used_main_questions and main_q not in candidate_main_list:
                    candidate_main_list.append(main_q)

        print_main_candidates(candidate_main_list)

        if not candidate_main_list:
            print("[AI] No new main questions to ask, conversation finished.")
            break

        background_knowledge = retrieve_background_knowledge(
            initial_query, api_client, background_vector_index_id, space_id
        )

        best_main_question = ai_select_question_main(
            candidate_main_list, qna_history, ai_params,
            background_knowledge=background_knowledge,
            patient_query=initial_query
        )
        used_main_questions.add(best_main_question)

        q_full = find_question_by_main(best_main_question, question_bank)
        full_question = best_main_question if not q_full else q_full.get("Prompt_Main")
        options = [] if not q_full else q_full.get("options", [])

        print(f"\nQ{turn+1}: {full_question}")
        if options:
            print("Options: " + ", ".join(options))
        answer = input("Your answer: ").strip()
        qna_history.append({"question": full_question, "answer": answer})
        used_main_questions.add(full_question)

        if len(qna_history) >= max_total_qa:
            print("[AI] Q&A limit reached, conversation finished.")
            break

        # 6. Follow-up
        if q_full:
            for j in range(1, 4):
                fu_obj = q_full.get(f"Prompt_Followup{j}", {})
                fu_q = fu_obj.get("question") if isinstance(fu_obj, dict) else ""
                if fu_q and fu_q not in used_main_questions:
                    if ai_should_ask_followup(fu_q, qna_history, ai_params):
                        print(f"Follow-up: {fu_q}")
                        fu_ans = input("Your answer: ").strip()
                        qna_history.append({"question": fu_q, "answer": fu_ans})
                        used_main_questions.add(fu_q)
                        keywords_pool = expand_keywords(fu_ans, qna_history, keywords_pool, ai_params)
                        keywords_history.append(keywords_pool)
                        if len(qna_history) >= max_total_qa:
                            print("[AI] Q&A limit reached, conversation finished.")
                            break

        keywords_pool = expand_keywords(answer, qna_history, keywords_pool, ai_params)
        keywords_history.append(keywords_pool)

        if answer.lower() in ['quit', 'exit', 'stop', 'done']:
            break

    # Save Q&A and keyword histories
    with open("qna_history.json", "w", encoding="utf-8") as f:
        json.dump(qna_history, f, indent=2, ensure_ascii=False)
    with open("keywords_history.json", "w", encoding="utf-8") as f:
        json.dump(keywords_history, f, indent=2, ensure_ascii=False)

    print("\n[Full Q&A history saved to qna_history.json]")
    print("\n[Keyword history saved to keywords_history.json]")

    summary = generate_structured_summary_with_rag(
        initial_query, qna_history, keywords_history, background_knowledge, ai_params
    )
    with open("structured_summary.json", "w", encoding="utf-8") as f:
        json.dump(summary, f, indent=2, ensure_ascii=False)
    print("\n[Summary with recommendations and resources saved to structured_summary.json]")
    print("\n[Structured summary]:")
    print(json.dumps(summary, indent=2, ensure_ascii=False))
    return qna_history


In [40]:
#main
if __name__ == "__main__":
    dialogue = smart_rag_multi_turn(ai_params, question_bank, max_turns=7)
    for qa in dialogue:
        print(f"Q: {qa['question']}\nA: {qa['answer']}")

[Keyword pool for this turn]: ['speech', 'communication', 'swallowing', 'dysphagia', 'voice', 'als', 'mnd', 'symptoms']
[Keywords after initialization]: ['speech', 'communication', 'swallowing', 'dysphagia', 'voice', 'als', 'mnd', 'symptoms']
[Current keyword pool, turn 1]: ['speech', 'communication', 'swallowing', 'dysphagia', 'voice', 'als', 'mnd', 'symptoms']

['ID: PROMPT-057', 'Class: Voice preservation', 'Subclass: nan', 'Topic: Voice preservation - nan', 'PNM Category: Esteem', 'Patterns: Voice preservation', 'Main Question', "Have you considered preserving your voice (for example, voice banking) while it's still strong?", 'Follow-up 1', 'Question: If yes, have you started the process of recording your voice for future use?', 'Type: yesno', 'Options: (none)', 'Follow-up 2', 'Question: How do you feel about using technology to communicate if speaking becomes difficult?', 'Type: multiple', 'Options: I’m comfortable with the idea of using communication devices, I have mixed feeling