<a href="https://colab.research.google.com/github/KusalaniR/MedGen.AI/blob/main/notebooks/chatbot_module.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import google.generativeai as genai
from google.colab import userdata
from google.api_core.exceptions import ResourceExhausted
import textwrap


In [3]:
# Supported languages
SUPPORTED_LANGUAGES = ["en", "si"]

In [4]:
#Language Detection Function
# ---------------------------------------------
# Detect user language (English / Sinhala)
# ---------------------------------------------

def detect_language(text):
    """
    Detects Sinhala vs English based on Unicode range.
    Sinhala characters range: \u0D80 - \u0DFF
    """
    for ch in text:
        if '\u0D80' <= ch <= '\u0DFF':
            return "si"
    return "en"


In [5]:
# ---------------------------------------------
# Sinhala medical question → English intent
# ---------------------------------------------

def sinhala_to_english_question(text):
    """
    Converts common Sinhala medical questions into English.
    Rule-based (safe & FYP-appropriate).
    """

    text = text.lower()

    if "mch" in text and "ඉහළ" in text:
        return "Why is my MCH high?"

    if "cholesterol" in text or "කොලෙස්ටරෝල්" in text:
        return "Is my cholesterol normal?"

    if "rdw" in text:
        return "What does RDW mean?"

    if "රතු රුධිර" in text:
        return "What do my red blood cell results mean?"

    return "Explain my blood report"


In [6]:
#Translation Function
# ---------------------------------------------
# Translate English → Sinhala (fallback version)
# ---------------------------------------------

def translate_to_sinhala(text):
    """
    Clean + medical-safe Sinhala translation
    """

    replacements = {
        "Based on the report provided,": "ඔබගේ රුධිර වාර්තාව අනුව",
        "there are two separate MCH test results listed,":
            "MCH පරීක්ෂණය සඳහා අගයන් දෙකක් සටහන් වී ඇත",
        "High": "ඉහළ",
        "Low": "අඩු",
        "Normal": "සාමාන්‍ය",
        "Mean Corpuscular Hemoglobin":
            "Mean Corpuscular Hemoglobin (MCH)",
        "red blood cells": "රතු රුධිර කණිකා",
        "This information is educational only.":
            "මෙය අධ්‍යාපනික විස්තරයක් පමණි.",
        "Please consult your healthcare provider.":
            "කරුණාකර වෛද්‍යවරයෙකු හමුවන්න.",
        "I cannot provide a medical diagnosis.":
            "මෙය වෛද්‍ය නිශ්චයයක් නොවේ."
    }

    for en, si in replacements.items():
        text = text.replace(en, si)

    return text




In [7]:


from google.colab import drive
drive.mount("/content/drive")

# Load extracted OCR + Gemini results (RAG source 1)
final_df = pd.read_csv(
    "/content/drive/MyDrive/MedGen.AI Datasets/FINALIZED DATASETS/OCR/extracted_report_results.csv"
)

final_df.head()


Mounted at /content/drive


Unnamed: 0,test_name,value,status,language,explanation
0,RBC,437.0,Unknown,si,\nTest: RBC\nValue: 437.0\nStatus: Unknown\n\n...
1,MCV,92.0,Normal,si,\nTest: MCV\nValue: 92.0\nStatus: Normal\n\nEx...
2,MCH,287.0,High,si,\nTest: MCH\nValue: 287.0\nStatus: High\n\nExp...
3,MCH,322.0,High,si,\nTest: MCH\nValue: 322.0\nStatus: High\n\nExp...
4,RDW,8.0,Low,si,\nTest: RDW\nValue: 8.0\nStatus: Low\n\nExplan...


In [8]:
#Load medical Knowledge (RAG source 2)
knowledge_df = pd.read_csv(
    "/content/drive/MyDrive/MedGen.AI Datasets/FINALIZED DATASETS/blood_test_knowledge.csv",
    encoding="latin1"
)

knowledge_df.head()


Unnamed: 0,test_name,normal_range,unit,low_meaning,high_meaning,simple_explanation_en
0,Hemoglobin,"1215.5 (Female), 13.517.5 (Male)",g/dL,May indicate low oxygen-carrying capacity,May indicate dehydration,Hemoglobin is a protein in red blood cells tha...
1,Hematocrit,"3646% (Female), 4153% (Male)",%,May indicate anemia,May indicate dehydration,Hematocrit shows the percentage of red blood c...
2,WBC Count,4.010.0,K/uL,May reduce ability to fight infections,May indicate infection or inflammation,White blood cells help your body fight infecti...
3,Red Blood Cells,"4.25.4 (Female), 4.76.1 (Male)",m/uL,May reduce oxygen delivery,May thicken blood,Red blood cells carry oxygen from your lungs t...
4,Platelet Count,150400,K/uL,May increase bleeding risk,May increase clotting risk,Platelets help your blood to clot and stop ble...


In [23]:
# ---------------------------------------------
# Rule-based Sinhala medical explanations
# ---------------------------------------------

def sinhala_medical_explanation(test, status, knowledge_row):
    """
    Generates SAFE Sinhala explanation without AI translation
    """

    base = knowledge_row["simple_explanation_en"]

    # Sinhala base explanations
    base_si = {
        "MCH": "MCH යනු රතු රුධිර කණිකා තුළ ඇති හීමෝග්ලොබින් ප්‍රමාණය මැනීමයි.",
        "MCV": "MCV යනු රතු රුධිර කණිකා වල සාමාන්‍ය ප්‍රමාණය පෙන්වයි.",
        "RDW": "RDW යනු රතු රුධිර කණිකා වල ප්‍රමාණ වෙනස් වීම පෙන්වයි.",
        "Platelet Count": "පලට්ලට් යනු රුධිරය කැටි වීමට උපකාරී වන කණිකා වේ.",
        "Cholesterol": "කොලෙස්ටරෝල් යනු ශරීරයට අවශ්‍ය තෙල් වර්ගයකි."
    }

    meaning_si = {
        "High": "මෙම අගය සාමාන්‍යයට වඩා ඉහළයි.",
        "Low": "මෙම අගය සාමාන්‍යයට වඩා අඩුයි.",
        "Normal": "මෙම අගය සාමාන්‍ය සීමාව තුළ ඇත."
    }

    explanation = f"""
{base_si.get(test, test + " රුධිර පරීක්ෂාවකි.")}

{meaning_si.get(status, "")}

මෙය අධ්‍යාපනික විස්තරයක් පමණක් වන අතර,
නිශ්චිත වෛද්‍ය උපදෙස් සඳහා වෛද්‍යවරයෙකු හමුවන්න.
"""

    return explanation.strip()


In [24]:
# -------------------------------
# Gemini setup (Chatbot module)
# -------------------------------
import google.generativeai as genai
from google.colab import userdata

# Configure API key
genai.configure(api_key=userdata.get("GEMINI_API_KEY"))

# Create Gemini model instance
model = genai.GenerativeModel("models/gemini-flash-lite-latest")

In [25]:
def fallback_chatbot_response(user_question, final_df):
    """
    Rule-based fallback when Gemini quota is exceeded.
    """
    for _, row in final_df.iterrows():
        if row["test_name"].lower() in user_question.lower():
            return (
                f"{row['test_name']} result is {row['value']} "
                f"and the status is {row['status']}. "
                "Please consult a doctor for more details."
            )
    return "I can only answer questions related to your blood report."


In [26]:
#Prepare chatbot context
#because Tell Gemini: These are THIS USER’S test results. Answer only based on this
# -------------------------------
# Build chatbot medical context
# -------------------------------

# def build_report_context(final_df):
#     """
#     Converts extracted blood test results into
#     a readable medical context for the chatbot.
#     """
#     context = "User Blood Test Summary:\n\n"

#     for _, row in final_df.iterrows():
#         context += (
#             f"- Test: {row['test_name']}\n"
#             f"  Value: {row['value']}\n"
#             f"  Status: {row['status']}\n"
#             f"  Explanation: {row['explanation']}\n\n"
#         )

#     return context



# ---------------------------------------------
# Build report context from final_df
# ---------------------------------------------

def build_report_context(final_df):
    """
    Converts final_df into readable medical context
    """
    context = ""

    for _, row in final_df.iterrows():
        context += (
            f"Test: {row['test_name']}, "
            f"Value: {row['value']}, "
            f"Status: {row['status']}. "
        )

    return context



In [27]:
#Create chatbot prompt (RAG logic)
# def chatbot_prompt(user_question, report_context):
#     """
#     Builds a safe medical chatbot prompt using RAG.
#     """
#     return f"""
# You are a medical report assistant.

# You MUST answer ONLY using the user's blood test report below.
# If the question is unrelated, politely say you cannot answer.

# User Report:
# {report_context}

# User Question:
# {user_question}

# Rules:
# - No diagnosis
# - No medicines
# - Simple, friendly language
# - Educational only
# """

def chatbot_prompt(user_question, report_context):
    """
    Safer + friendlier medical chatbot prompt
    """
    return f"""
You are a medical assistant chatbot.

Below is the patient's blood test report:
{report_context}

User Question:
{user_question}

Rules:
- Explain what the result means in general terms
- Do NOT give a diagnosis
- Do NOT prescribe medicine
- You MAY mention common general reasons (educational only)
- Use simple, patient-friendly language
- End by advising to consult a doctor
"""



In [29]:
#Gemini chatbot function
# def chatbot_response(user_question, final_df):
#     """
#     Generates chatbot response using report context.
#     Falls back to rule-based response if Gemini fails.
#     """

#     report_context = build_report_context(final_df)
#     prompt = chatbot_prompt(user_question, report_context)

#     try:
#         response = model.generate_content(prompt)
#         return response.text

#     except Exception as e:
#         # Fallback (VERY important for grading)
#         return (
#             "I am unable to use AI right now, but based on your report, "
#             "please review the test values and their status. "
#             "This explanation is educational only."
#         )



# ---------------------------------------------
# Bilingual Medical Chatbot
# ---------------------------------------------

def chatbot_response(user_question, final_df):
    """
    Answers user questions in English or Sinhala.
    """

    # 1. Detect user language
    user_lang = detect_language(user_question)

    # 2. Build report context (used for English + Gemini)
    report_context = build_report_context(final_df)

    # ---------------------------------
    # SINHALA PATH (RULE-BASED)
    # ---------------------------------
    if user_lang == "si":

        for _, row in final_df.iterrows():
            if row["test_name"].lower() in user_question.lower():

                # Get medical knowledge row
                knowledge_row = knowledge_df[
                    knowledge_df["test_name"] == row["test_name"]
                ].iloc[0]

                return sinhala_medical_explanation(
                    row["test_name"],
                    row["status"],
                    knowledge_row
                )

        return "මෙම ප්‍රශ්නයට අදාල රුධිර පරීක්ෂා තොරතුරු නොමැත."

    # ---------------------------------
    # ENGLISH PATH (GEMINI)
    # ---------------------------------
    try:
        prompt = chatbot_prompt(user_question, report_context)
        response = model.generate_content(prompt)
        return response.text

    except:
        return fallback_chatbot_response(user_question, final_df)






In [14]:
#Test english
print(chatbot_response("Why is my MCH high?", final_df))


Based on the report provided, there are two entries for **MCH (Mean Corpuscular Hemoglobin)**, and both show a **High** status:

*   Test: MCH, Value: 287.0, Status: High.
*   Test: MCH, Value: 322.0, Status: High.

**As a medical assistant, I can only report the findings from the lab results you provided. I cannot provide a medical diagnosis or explain the specific reason *why* your MCH is high.**

Elevated MCH values indicate that your red blood cells contain more hemoglobin than average. To understand the clinical significance of this result and determine the underlying cause, you must discuss these findings with your healthcare provider. They will interpret these results in the context of your complete medical history and other lab values.


In [30]:
#Test sinhala
print(chatbot_response("මගේ MCH ඉහළ ඇයි?", final_df))


MCH යනු රතු රුධිර කණිකා තුළ ඇති හීමෝග්ලොබින් ප්‍රමාණය මැනීමයි.

මෙම අගය සාමාන්‍යයට වඩා ඉහළයි.

මෙය අධ්‍යාපනික විස්තරයක් පමණක් වන අතර,
නිශ්චිත වෛද්‍ය උපදෙස් සඳහා වෛද්‍යවරයෙකු හමුවන්න.


In [32]:
print(chatbot_response("Is my cholesterol normal?", final_df))


Hello! I see you have a question about your cholesterol result.

Based on the report you provided:

Your **Cholesterol** value is **194.0**, and the status is marked as **Normal**.

In general terms, cholesterol is a waxy, fat-like substance found in all your cells. Your body needs some cholesterol to make hormones, vitamin D, and help with digestion. A "normal" range generally means your level is within a range that is not typically associated with an immediate increased risk of heart issues related to high cholesterol.

Sometimes, dietary choices (like eating a lot of saturated or trans fats) or lifestyle factors can influence cholesterol levels, but your result here is within the usual expected range.

Since this is just one part of your overall blood work, it is very important to discuss all these results with your healthcare provider. They can look at everything together and give you the best advice based on your complete health picture.

**Please make sure to consult with your do

In [33]:
print(chatbot_response("මගේ කොලෙස්ටරෝල් සාමාන්‍යද?", final_df))


මෙම ප්‍රශ්නයට අදාල රුධිර පරීක්ෂා තොරතුරු නොමැත.


In [34]:
print(chatbot_response("RDW low කියන්නේ මොකක්ද?", final_df))


RDW යනු රතු රුධිර කණිකා වල ප්‍රමාණ වෙනස් වීම පෙන්වයි.

මෙම අගය සාමාන්‍යයට වඩා අඩුයි.

මෙය අධ්‍යාපනික විස්තරයක් පමණක් වන අතර,
නිශ්චිත වෛද්‍ය උපදෙස් සඳහා වෛද්‍යවරයෙකු හමුවන්න.
