In [None]:
from google.colab import userdata
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

In [None]:
from typing import List, Optional
from pydantic import BaseModel


class Patient(BaseModel):
    name: str
    age: int
    sex: str


class Lab(BaseModel):
    category: str
    test_name: str
    value: str
    unit: str
    reference_range: str
    flag: str


class CVDSummary(BaseModel):
    five_year_risk_percent: float
    risk_level: str
    interpretation: str
    modifiable_risk_factors: List[str]
    risk_reduction_advice: List[str]


class Lifestyle(BaseModel):
    smoking: str
    alcohol: str
    diet: str
    physical_activity: str


class Assessment(BaseModel):
    summary: str
    family_history: str
    lifestyle: Lifestyle


class PlanItem(BaseModel):
    advice: str
    kb_resource_id: str


class RedFlag(BaseModel):
    symptom: str
    note: str


class Resource(BaseModel):
    category: str
    title: str
    url: str

class CategoryReportItem(BaseModel):
    category: str
    text: str
    sources: List[str]


class MedicalReport(BaseModel):
    patient: Patient
    labs: List[Lab]
    cvd_summary: CVDSummary
    assessment: Assessment
    plan: List[PlanItem]
    red_flags: List[RedFlag]
    resources_table: List[Resource]
    category_reports: List[CategoryReportItem]
    disclaimer: str

In [None]:
import json
from pathlib import Path


def read_medical_report(file_path: str) -> MedicalReport:
    with open(file_path, 'r') as f:
        data = json.load(f)
    return MedicalReport(**data)

data = read_medical_report('/content/drive/MyDrive/MyfastHealth/projects/AI/user_report_generator/kushagra_mandwal.json')

In [None]:
def get_unique_categories(report: MedicalReport) -> List[str]:
    return list(set(resource.category for resource in report.resources_table))

unique_categories = get_unique_categories(data)

In [None]:
unique_categories

['weight_management', 'alcohol', 'healthy_eating', 'blood_pressure']

In [None]:
class KnowledgeBaseItem(BaseModel):
    file_name: str
    id: str
    title: str
    category: str
    applies_to: List[str]
    summary_length_words: int
    source_url: str
    verified_source: bool
    last_updated: str
    status: str


def read_knowledge_base(file_path: str) -> List[KnowledgeBaseItem]:
    with open(file_path, 'r') as f:
        data = json.load(f)
    return [KnowledgeBaseItem(**item) for item in data]

In [None]:
!ls /content/drive/MyDrive/MyfastHealth/projects/AI/user_report_generator/knowledgebase

alcohol_nhmrc.md
alcohol_support_fare.md
blood_pressure_stroke_highbp_fact_sheet.md
first_nations_at_si_healthy_eating_guide.md
first_nations_quitline_aboriginal.md
healthy_eating_aghe.md
healthy_eating_heart_healthy_body_weight.md
healthy_eating_heartpattern.md
healthy_eatingquiz_home.md
metadata.json
weight_management_weightloss_healthylifestyle.md


In [None]:
def get_category_content(category: str, kb_path: str, metadata_path: str) -> str:
    knowledge_base = read_knowledge_base(metadata_path)

    content_parts = []
    for item in knowledge_base:
        if item.category == category:
            file_path = f"{kb_path}/{item.file_name}"
            with open(file_path, 'r') as f:
                text = f.read()
            content_parts.append(f"# {item.title}\n\nSource: {item.source_url}\n\n{text}\n\n---\n")

    return "\n".join(content_parts)

kb_path = "/content/drive/MyDrive/MyfastHealth/projects/AI/user_report_generator/knowledgebase"
metadata_path = "/content/drive/MyDrive/MyfastHealth/projects/AI/user_report_generator/knowledgebase/metadata.json"
content = get_category_content("weight_management", kb_path, metadata_path)

In [None]:
content

'# Weight loss - a healthy approach\n\nSource: https://www.betterhealth.vic.gov.au/health/healthyliving/weight-loss-a-healthy-approach\n\nHere is a plain-language summary of the “Weight loss – a healthy approach” fact sheet from the Better Health Channel (Victoria):\n\n⸻\n\nBackground\n\t•\tBeing overweight or having too much body fat increases the risk of health problems such as coronary heart disease and type 2 diabetes.  ￼\n\t•\tMany people try fad diets or “quick fixes,” but although they may work while you stick with them, weight is often regained once usual eating resumes.  ￼\n\t•\tRepeated cycles of losing and regaining weight (yo-yo dieting) can impair metabolism (the body burns fewer calories) and may itself raise heart disease risk.  ￼\n\n⸻\n\nMain Recommendations & Healthy Approach\n\n1. Go slowly and sustainably\n\t•\tA healthy pace of weight loss is about ½ to 1 kg per week.  ￼\n\t•\tMaking small, realistic changes you can maintain long term is better than extreme diets.  

In [None]:
!pip install langchain_openai langchain

Collecting langchain_openai
  Downloading langchain_openai-0.3.35-py3-none-any.whl.metadata (2.4 kB)
Downloading langchain_openai-0.3.35-py3-none-any.whl (75 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.0/76.0 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain_openai
Successfully installed langchain_openai-0.3.35


In [None]:
from google.colab import userdata
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers import JsonOutputParser
from pydantic import BaseModel, Field
import os


class CategoryReport(BaseModel):
    category: str = Field(description="The category of the report")
    text: str = Field(description="The one-page friendly report text with inline source links")
    sources: List[str] = Field(description="List of source URLs used")


def generate_category_report(category_content: str, category: str) -> dict:
    os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
    llm = ChatOpenAI(model="gpt-4o")
    parser = JsonOutputParser(pydantic_object=CategoryReport)

    system_message = SystemMessage(content="""You are a helpful health information assistant.
    Create a friendly, one-page report summarizing the provided content.
    Keep the tone warm and encouraging.
    Include inline links in the text using markdown format [text](url).
    Also provide a separate list of all source URLs at the end.""")

    human_message = HumanMessage(content=f"""Based on the following content about {category},
    create a one-page friendly summary report.
    Include inline source links in markdown format throughout the text.
    Extract and list all source URLs separately.

    Content:
    {category_content}

    {parser.get_format_instructions()}""")

    response = llm.invoke([system_message, human_message])
    return parser.parse(response.content)

In [None]:
temp = generate_category_report(content, "weight_management")

In [None]:
temp

{'category': 'weight_management',
 'text': "Losing weight can be a rewarding journey towards better health and well-being. It's important to adopt a healthy, balanced approach that you can maintain over the long term, as highlighted in several guides. For instance, the [Better Health Channel](https://www.betterhealth.vic.gov.au/health/healthyliving/weight-loss-a-healthy-approach) suggests losing weight slowly at about ½ to 1 kg per week and emphasizes sustainable lifestyle changes over fad diets, which often lead to yo-yo dieting and can harm metabolism. Understanding the balance between the energy you consume and the energy you expend is key. Keeping a food diary and tracking physical activities can offer insights into your habits and help adjust behaviors for better outcomes.\n\nBuilding a plan with [SMART goals](https://www.betterhealth.vic.gov.au/health/healthyliving/weight-loss-a-healthy-approach) — being Specific, Measurable, Achievable, Realistic, and Time-bound — is crucial in 

In [None]:
from tqdm.notebook import tqdm

reports = []
for category in tqdm(unique_categories):
    content = get_category_content(category, kb_path, metadata_path)
    report = generate_category_report(content, category)
    reports.append(report)

  0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
from tqdm.notebook import tqdm
import json
from pathlib import Path


def read_medical_report(file_path: str, kb_path: str, metadata_path: str) -> MedicalReport:
    with open(file_path, 'r') as f:
        report_data = json.load(f)

    unique_categories = list(set(resource['category'] for resource in report_data['resources_table']))

    reports = []
    for category in tqdm(unique_categories):
        content = get_category_content(category, kb_path, metadata_path)
        report = generate_category_report(content, category)
        reports.append(CategoryReportItem(**report))

    report_data['category_reports'] = [report.model_dump() for report in reports]

    return MedicalReport(**report_data)


data = read_medical_report(
    '/content/drive/MyDrive/MyfastHealth/projects/AI/user_report_generator/kushagra_mandwal.json',
    kb_path,
    metadata_path
)

  0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
with open('/content/drive/MyDrive/MyfastHealth/projects/AI/user_report_generator/kushagra_mandwal_with_reports.json', 'w') as f:
    json.dump(data.model_dump(), f, indent=2)

In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
import os


def generate_medical_report_md(report: MedicalReport) -> str:
    os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
    llm = ChatOpenAI(model="gpt-4o")

    category_reports_text = ""
    for cat_report in report.category_reports:
        category_reports_text += f"\n### {cat_report.category.replace('_', ' ').title()}\n\n"
        category_reports_text += cat_report.text
        category_reports_text += "\n\n**Sources:**\n"
        for source in cat_report.sources:
            category_reports_text += f"- {source}\n"
        category_reports_text += "\n---\n"

    system_message = SystemMessage(content="""You are a medical report formatter.
    Create a comprehensive markdown medical report with appropriate emojis for better readability.
    Use emojis for section headers and important items.
    DO NOT change or paraphrase the category reports section - keep it exactly as provided.
    Format all data in well-structured markdown with tables where appropriate.""")

    human_message = HumanMessage(content=f"""Create a complete markdown medical report from this data.

    Patient: {report.patient.model_dump_json()}
    Labs: {json.dumps([lab.model_dump() for lab in report.labs])}
    CVD Summary: {report.cvd_summary.model_dump_json()}
    Assessment: {report.assessment.model_dump_json()}
    Plan: {json.dumps([p.model_dump() for p in report.plan])}
    Red Flags: {json.dumps([r.model_dump() for r in report.red_flags])}
    Resources: {json.dumps([r.model_dump() for r in report.resources_table])}

    IMPORTANT: For the "Detailed Health Information Guides" section, use EXACTLY this text without any changes:
    {category_reports_text}

    Disclaimer: {report.disclaimer}

    Add appropriate emojis throughout the document for better visual appeal.
    Keep all category reports text EXACTLY as provided above.""")

    response = llm.invoke([system_message, human_message])
    return response.content


def save_medical_report_md(report: MedicalReport, output_path: str):
    md_content = generate_medical_report_md(report)
    with open(output_path, 'w') as f:
        f.write(md_content)
    return md_content



In [None]:
output_path = "/content/drive/MyDrive/MyfastHealth/projects/AI/user_report_generator/kushagra_mandwal_with_reports.md"
save_medical_report_md(data, output_path)

'# 🏥 Medical Report\n\n---\n\n## 🗂️ Patient Details\n\n- **Name:** Kushagra Mandwal\n- **Age:** 33\n- **Sex:** Male\n\n---\n\n## 🧪 Laboratory Results\n\n### Biochemistry\n\n| Test Name          | Value | Unit    | Reference Range | Flag   |\n|--------------------|-------|---------|-----------------|--------|\n| Sodium             | 139   | mmol/L  | 135-145         | Normal |\n| Potassium          | 5.2   | mmol/L  | 3.5-5.5         | Normal |\n| Chloride           | 103   | mmol/L  | 95-110          | Normal |\n| Bicarbonate        | 27    | mmol/L  | 20-32           | Normal |\n| Urea               | 3.0   | mmol/L  | 3.0-8.0         | Normal |\n| Creatinine         | 81    | umol/L  | 60-110          | Normal |\n| eGFR               | >90   | ml/min  | >59             | Normal |\n| Uric Acid          | 0.40  | mmol/L  | 0.20-0.50       | Normal |\n| Calcium            | 2.51  | mmol/L  | 2.10-2.60       | Normal |\n| Corrected Calcium  | 2.33  | mmol/L  | 2.10-2.60       | Normal |\