Milestone_3: NLP/AI Text Intepretation

In [9]:
# install required packages (skip apt-get for Windows)
!pip install PyPDF2 pdf2image pdfplumber pytesseract pillow tensorflow --upgrade

# imports
import os, re, json, numpy as np
from pathlib import Path
import PyPDF2
from pdf2image import convert_from_path
import pytesseract
from tensorflow.keras.models import load_model

# define paths
base_dir = Path(r"C:\Users\mouni\OneDrive\Desktop\AINUTRICARE")
raw_reports = base_dir / "Data/Raw_data/Reports"
x_path = base_dir / "Data/Transformed_data/X_timeseries.npy"
model_path = base_dir / "Data/Transformed_data/icu_outcome_lstm.h5"
out_dir = base_dir / "Data/final_output"

# load the data
x_train = np.load(x_path)
Time_steps = x_train.shape[1]
num_features = x_train.shape[2]

feature_mean = x_train.mean(axis=(0,1))
feature_std = x_train.std(axis=(0,1))

# load the trained model
model = load_model(model_path)
model.summary()




  if not hasattr(np, "object"):


In [10]:
# extract the text from the pdf
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            if page.extract_text():
                text += page.extract_text() + "\n"
    return text

def is_scanned_pdf(pdf_path):
    with open(pdf_path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            if page.extract_text():
                return False
    return True

def ocr_pdf(pdf_path):
    images = convert_from_path(pdf_path)
    return " ".join(pytesseract.image_to_string(img, config="--psm 6") for img in images)

# clean the text
def clean_text(text):
    text = text.replace("\x0c", " ")
    text = re.sub(r"\s+", " ", text)
    return text.strip()  

In [11]:
# extract the vitals and necessary information
def extract_float(text, pattern):
    m = re.search(pattern, text, re.I)
    return float(m.group(1)) if m else None

def extract_int(text, pattern):
    m = re.search(pattern, text, re.I)
    return int(m.group(1)) if m else None

def extract_age(text):
    m = re.search(r"\bage\s*[:\-]?\s*(\d{2,3})\b", text, re.I)
    if not m:
        return None
    age = int(m.group(1))
    return age if 18 <= age <= 100 else None

def extract_features(text):
    f = {}

    # VITALS
    f["heart_rate"] = extract_int(text, r"(?:heart rate|hr|pulse)\s*[:\-]?\s*(\d+)")
    f["respiratory_rate"] = extract_int(text, r"(?:respiratory rate|rr)\s*[:\-]?\s*(\d+)")
    f["spo2"] = extract_int(text, r"(?:spo2|o2 saturation)\s*[:\-]?\s*(\d+)")
    f["temperature_fahrenheit"] = extract_float(text, r"(?:temperature|temp)\s*[:\-]?\s*([0-9.]+)")
    f["mean_bp"] = extract_int(text, r"(?:mean bp|blood pressure mean|map)\s*[:\-]?\s*(\d+)")

    # LABS
    f["glucose"] = extract_float(text, r"(?:glucose|blood sugar)\s*[:\-]?\s*([0-9.]+)")
    f["creatinine"] = extract_float(text, r"creatinine\s*[:\-]?\s*([0-9.]+)")
    f["urea"] = extract_float(text, r"(?:urea|bun)\s*[:\-]?\s*([0-9.]+)")
    f["sodium"] = extract_float(text, r"sodium\s*[:\-]?\s*([0-9.]+)")
    f["potassium"] = extract_float(text, r"potassium\s*[:\-]?\s*([0-9.]+)")
    f["hemoglobin"] = extract_float(text, r"(?:hemoglobin|hgb)\s*[:\-]?\s*([0-9.]+)")
    f["wbc"] = extract_float(text, r"wbc\s*[:\-]?\s*([0-9.]+)")
    f["lactate"] = extract_float(text, r"lactate\s*[:\-]?\s*([0-9.]+)")
    f["ph"] = extract_float(text, r"\bph\s*[:\-]?\s*([0-9.]+)")
    f["cholesterol"] = extract_float(text, r"cholesterol\s*[:\-]?\s*([0-9.]+)")

    # INTERVENTIONS
    t = text.lower()
    f["fluid_balance"] = int("fluid" in t)
    f["vasopressors"] = int("vasopressor" in t)
    f["sedatives"] = int("sedative" in t)
    f["antibiotics"] = int("antibiotic" in t)
    f["insulin"] = int("insulin" in t)

    # DEMOGRAPHICS
    f["age"] = extract_age(text)

    gender = re.search(r"\b(sex|gender)\s*[:\-]?\s*(male|female)\b", text, re.I)
    f["gender"] = 1 if gender and gender.group(2).lower() == "male" else 0

    return f

     

In [12]:
# defining features orders from the trained model
FEATURE_ORDER = [
    "heart_rate",
    "respiratory_rate",
    "spo2",
    "temperature_fahrenheit",
    "mean_bp",
    "glucose",
    "creatinine",
    "urea",
    "sodium",
    "potassium",
    "hemoglobin",
    "wbc",
    "lactate",
    "ph",
    "cholesterol",
    "fluid_balance",
    "vasopressors",
    "sedatives",
    "antibiotics",
    "insulin",
    "age",
    "gender"
]

In [13]:
# building model
def build_model_input(features_dict):
    values = []

    for key in FEATURE_ORDER:
        val = features_dict.get(key, 0.0)

        # handle missing / invalid values safely
        if val is None or val == "" or val == "NA":
            val = 0.0

        try:
            val = float(val)
        except (TypeError, ValueError):
            val = 0.0

        values.append(val)

    values = np.array(values, dtype=np.float32)

    # normalize safely (avoid division by zero)
    values = (values - feature_mean) / (feature_std + 1e-8)

    # expand to time series
    x = np.tile(values, (Time_steps, 1))

    return x.reshape(1, Time_steps, num_features)


In [14]:
# calculate the mortality risk
clinical_output = {}
for file in os.listdir(raw_reports):
    path = raw_reports / file  

    if file.endswith(".pdf"):
        raw = ocr_pdf(path) if is_scanned_pdf(path) else extract_text_from_pdf(path)
        text = clean_text(raw)

        features = extract_features(text)
        x_input = build_model_input(features)

        risk = float(model.predict(x_input, verbose=0)[0][0])

        clinical_output[path.stem] = {
            "mortality_risk": round(risk, 4),
            "risk_category": "Low" if risk < 0.3 else "Moderate" if risk < 0.6 else "High",
            "features_used": features
        }


In [15]:
# INSTALL DEPENDENCIES
!pip install pdfplumber PyPDF2 pdf2image pytesseract pillow tensorflow numpy
!apt-get update
!apt-get install -y poppler-utils tesseract-ocr


# IMPORTS
import re
import json
import numpy as np
import tensorflow as tf
import pdfplumber
from pdf2image import convert_from_path
import pytesseract

# PATHS
pdf_path = r"C:\Users\mouni\OneDrive\Desktop\AINUTRICARE\Data\Raw_data\Reports\REPORT.pdf"
model_path = r"C:\Users\mouni\OneDrive\Desktop\AINUTRICARE\Data\Transformed_data\icu_outcome_lstm.h5"
x_path = r"C:\Users\mouni\OneDrive\Desktop\AINUTRICARE\Data\Transformed_data\X_timeseries.npy"
output_JSON  = "clinical_output.json"  

# FEATURE ORDER 
FEATURE_NAMES = [
    "Heart Rate", "Respiratory Rate", "O2 saturation pulseoxymetry",
    "Temperature Fahrenheit", "Non Invasive Blood Pressure mean",
    "Glucose", "Creatinine", "Urea (BUN)", "Sodium", "Potassium",
    "Hemoglobin", "WBC", "Lactate", "pH", "Cholesterol",
    "Fluid_Balance", "Vasopressors", "Sedatives", "Antibiotics",
    "Insulin", "Age", "Gender"
]



'apt-get' is not recognized as an internal or external command,
operable program or batch file.
'apt-get' is not recognized as an internal or external command,
operable program or batch file.


In [16]:
# pdf text extraction
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            if page.extract_text():
                text += page.extract_text() + "\n"
    return text

# clean the text
def clean_text(text):
    text = text.replace("\x0c", " ")
    text = re.sub(r"\s+", " ", text)
    return text.strip()

# safe float conversion
def safe_float(val):
    try:
        return float(val)
    except (TypeError, ValueError):
        return 0.0

In [17]:
# dynamic feature extraction
def extract_features(text):
    features = {}

    # ---- AGE & GENDER ----
    ag = re.search(r"(Male|Female)\s*/\s*(\d+)\s*Y", text, re.I)
    if ag:
        gender_str = ag.group(1)
        features["Gender"] = 1 if gender_str.lower() == "male" else 0
        features["Age"] = int(ag.group(2))
    else:
        features["Gender"] = None
        features["Age"] = None

    # ---- HEART RATE ----
    for p in [r"Heart Rate[:\s]+([\d.]+)",
        r"HR[:\s]+([\d.]+)",
        r"Pulse[:\s]+([\d.]+)",      # added Pulse
        r"Pulse Rate[:\s]+([\d.]+)",
        r"PR[:\s]+([\d.]+)"
    ]:
        val = extract_float(text, p)
        if val is not None:
            features["Heart Rate"] = val
            break
    else:
        features["Heart Rate"] = None

    # ---- TEMPERATURE ----
    for p in [r"Temperature[:\s]+([\d.]+)", r"Temp[:\s]+([\d.]+)"]:
        val = extract_float(text, p)
        if val is not None:
            features["Temperature Fahrenheit"] = val
            break
    else:
        features["Temperature Fahrenheit"] = None

    # ---- BLOOD PRESSURE ----
    for p in [r"BP[:\s]+([\d.]+)", r"Blood Pressure[:\s]+([\d.]+)", r"Mean BP[:\s]+([\d.]+)"]:
        val = extract_float(text, p)
        if val is not None:
            features["Non Invasive Blood Pressure mean"] = val
            break
    else:
        features["Non Invasive Blood Pressure mean"] = None

    # ---- LABS (DYNAMICALLY HANDLE ALIASES) ----
    lab_patterns = {
        "Glucose": [
        r"Glucose[:\s]+([\d.]+)",
        r"Blood Sugar[:\s]+([\d.]+)",
        r"Fasting Blood Sugar[:\s]+([\d.]+)",
        r"FBS[:\s]+([\d.]+)"
    ],

        "Creatinine": [r"Creatinine[:\s]+([\d.]+)"],
        "Urea (BUN)": [r"(Urea|BUN)[:\s]+([\d.]+)"],
        "Sodium": [r"Sodium[:\s]+([\d.]+)"],
        "Potassium": [r"Potassium[:\s]+([\d.]+)"],
        "Hemoglobin": [r"Hemoglobin[:\s]+([\d.]+)"],
        "WBC": [
        r"WBC[:\s]+([\d.]+)",
        r"White Blood Cells[:\s]+([\d.]+)",
        r"WBC count[:\s]+([\d.]+)"
    ],
        "Lactate": [r"Lactate[:\s]+([\d.]+)"],
        "pH": [r"pH[:\s]+([\d.]+)"],
        "Cholesterol": [r"Cholesterol[:\s]+([\d.]+)"]
    }

    for lab, patterns in lab_patterns.items():
        features[lab] = None
        for p in patterns:
            val = extract_float(text, p)
            if val is not None:
                features[lab] = val
                break

    return features


In [18]:
# build model input
def build_model_input(extracted, x_reference):
    feature_vector = np.zeros(len(FEATURE_NAMES), dtype=np.float32)
    for i, fname in enumerate(FEATURE_NAMES):
        val = extracted.get(fname)
        if val is None:
            feature_vector[i] = np.nan
        else:
            feature_vector[i] = float(val)
    # NaN → 0 ONLY for model
    feature_vector_model = np.nan_to_num(feature_vector, nan=0.0)
    x_input = np.tile(feature_vector_model, (24,1))
    x_input = np.expand_dims(x_input, axis=0)
    return x_input

In [19]:
# clinical intrepretation
def clinical_interpretation(risk, vitals):
    conditions = []
    recommendations = []
    avoid = []

    if risk < 0.3:
        conditions.append("Low Clinical Risk")
    elif risk < 0.6:
        conditions.append("Moderate Clinical Risk")
    else:
        conditions.append("High Clinical Risk")

    if vitals["glucose"] and vitals["glucose"] >= 140:
        conditions.append("Hyperglycemia / Diabetes Risk")
        recommendations += ["Low-GI foods", "High-fiber diet"]
        avoid += ["Sugary foods", "Sweetened beverages"]

    if vitals["creatinine"] and vitals["creatinine"] >= 1.3:
        conditions.append("Possible Renal Impairment")
        recommendations += ["Renal-friendly diet"]
        avoid += ["High sodium foods"]

    if vitals["hemoglobin"] and vitals["hemoglobin"] < 10:
        conditions.append("Anemia")
        recommendations += ["Iron-rich foods"]

    if vitals["lactate"] and vitals["lactate"] >= 2.0:
        conditions.append("Possible Tissue Hypoperfusion")

    summary = (
        "Patient is clinically stable."
        if risk < 0.3 else
        "Patient requires monitoring and dietary management."
        if risk < 0.6 else
        "Patient requires urgent clinical and nutritional intervention."
    )

    return list(set(conditions)), list(set(recommendations)), list(set(avoid)), summary




In [20]:
def extract_pdf_text(pdf_path):
    if is_scanned_pdf(pdf_path):
        return ocr_pdf(pdf_path)
    else:
        return extract_text_from_pdf(pdf_path)


In [21]:
# main pipeline

def main():
    # Load model and reference data
    model = tf.keras.models.load_model(model_path, compile=False)
    x_ref = np.load(x_path)
    # Extract PDF text
    raw_text = extract_pdf_text(pdf_path)
    text = clean_text(raw_text)
    # Extract dynamic features
    features = extract_features(text)
    # Build input for model
    x_input = build_model_input(features, x_ref)
    # Predict mortality risk
    mortality_risk = float(model.predict(x_input)[0][0])
    # Build patient metrics
    patient_metrics = {
    "mortality_risk": mortality_risk,
    "heart_rate": features.get("Heart Rate"),  # None if missing
    "temperature_f": features.get("Temperature Fahrenheit"),
    "blood_pressure_mean": features.get("Non Invasive Blood Pressure mean"),
    "glucose": features.get("Glucose"),
    "creatinine": features.get("Creatinine"),
    "urea": features.get("Urea (BUN)"),
    "sodium": features.get("Sodium"),
    "potassium": features.get("Potassium"),
    "hemoglobin": features.get("Hemoglobin"),
    "wbc": features.get("WBC"),
    "lactate": features.get("Lactate"),
    "ph": features.get("pH"),
    "cholesterol": features.get("Cholesterol"),
    "age": features.get("Age"),
    "gender": "Male" if features.get("Gender") == 1 else "Female"
}
    #Clinical interpretation
    conditions, recommendations, avoid, summary = clinical_interpretation(
        mortality_risk, patient_metrics
    )

    # final json
    clinical_output = {
        "patient_metrics": patient_metrics,
        "conditions": conditions,
        "recommendations": recommendations,
        "avoid": avoid,
        "summary": summary
    }
    # Save to JSON
    with open(output_JSON, "w") as f:
        json.dump(clinical_output, f, indent=4)

    print(json.dumps(clinical_output, indent=4))
# run
if __name__ == "__main__":
    main()

     


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 190ms/step
{
    "patient_metrics": {
        "mortality_risk": 0.4801770746707916,
        "heart_rate": 0.0,
        "temperature_f": null,
        "blood_pressure_mean": null,
        "glucose": null,
        "creatinine": null,
        "urea": null,
        "sodium": null,
        "potassium": null,
        "hemoglobin": null,
        "wbc": null,
        "lactate": null,
        "ph": 4.6,
        "cholesterol": null,
        "age": 41,
        "gender": "Male"
    },
    "conditions": [
        "Moderate Clinical Risk"
    ],
    "recommendations": [],
    "avoid": [],
    "summary": "Patient requires monitoring and dietary management."
}


In [22]:
# building knowledge base
import pandas as pd
import json

ifct = pd.read_csv(r"C:\Users\mouni\OneDrive\Desktop\AINUTRICARE\Data\Raw_data\ifct2017_compositions.csv")
indian_foods = pd.read_csv(r"C:\Users\mouni\OneDrive\Desktop\AINUTRICARE\Data\Raw_data\indian_food.csv")

# merge the two datasets based on food names
merged_foods = pd.merge(indian_foods, ifct, on='name',  how= "outer")

# fill missing values with 0
merged_foods.fillna(0, inplace=True)

# convert merged csv to json kb
diet_kb = merged_foods.to_dict(orient="records")
with open(r"C:\Users\mouni\OneDrive\Desktop\AINUTRICARE\Data\Transformed_data\diet_kb.json", "w") as f:
    json.dump(diet_kb, f, indent=4)



In [23]:
import json

with open("clinical_output.json", "r") as f:
    clinical_data = json.load(f)

print(json.dumps(clinical_data, indent=2))


{
  "patient_metrics": {
    "mortality_risk": 0.4801770746707916,
    "heart_rate": 0.0,
    "temperature_f": null,
    "blood_pressure_mean": null,
    "glucose": null,
    "creatinine": null,
    "urea": null,
    "sodium": null,
    "potassium": null,
    "hemoglobin": null,
    "wbc": null,
    "lactate": null,
    "ph": 4.6,
    "cholesterol": null,
    "age": 41,
    "gender": "Male"
  },
  "conditions": [
    "Moderate Clinical Risk"
  ],
  "recommendations": [],
  "avoid": [],
  "summary": "Patient requires monitoring and dietary management."
}


In [24]:
# load diet kb
with open(r"C:\Users\mouni\OneDrive\Desktop\AINUTRICARE\Data\Transformed_data\diet_kb.json", "r") as f:
    diet_kb = json.load(f)

In [25]:
!pip install google-generativeai

import json
import os
import google.generativeai as genai



  from .autonotebook import tqdm as notebook_tqdm

All support for the `google.generativeai` package has ended. It will no longer be receiving 
updates or bug fixes. Please switch to the `google.genai` package as soon as possible.
See README for more details:

https://github.com/google-gemini/deprecated-generative-ai-python/blob/main/README.md

  import google.generativeai as genai


In [26]:

! pip install google-genai



In [4]:
GEMINI_API_KEY = ""  # paste your Gemini API key from AI Studio
! pip install --upgrade google-generativeai




In [7]:
import os
import json
from google import genai

# defining paths
from pathlib import Path
BASE_DIR = Path(r"C:\Users\mouni\OneDrive\Desktop\AINUTRICARE")
diet_kb_path  = BASE_DIR / "Data" / "Transformed_data" / "diet_kb.json"

# GEMINI_API_KEY = "YOUR_GEMINI_API_KEY"  # from AI Studio
client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])

# Load clinical & diet KB
with open("clinical_output.json", "r") as f:
    clinical_data = json.load(f)

with open(diet_kb_path, "r", encoding="utf-8") as f:
    diet_kb = json.load(f)


#  prompt
prompt_text = f"""
You are a clinical dietitian AI.

IMPORTANT RULES:
- The patient is an ADULT unless explicitly stated otherwise.
- Do NOT assume pediatric or toddler age.
- Use ONLY Indian food items or Indian-style meals.
- Meals must be realistic (e.g., "Oats with skim milk", "Grilled chicken salad").

Patient clinical metrics:
{json.dumps(clinical_data, indent=2)}

Diet knowledge base:
{json.dumps(diet_kb[:50], indent=2)}

Generate a ONE-DAY diet plan in EXACT JSON FORMAT below.
Do NOT add text outside JSON.

{{
  "day_plan": {{
    "breakfast": [
      {{
        "item": "",
        "calories": 0.0,
        "protein": 0.0,
        "fat": 0.0,
        "carbs": 0.0,
        "tags": []
      }}
    ],
    "lunch": [],
    "dinner": [],
    "snacks": []
  }},
  "total_nutrition": {{
    "calories": 0.0,
    "protein": 0.0,
    "fat": 0.0,
    "carbs": 0.0
  }},
  "medical_reasoning": ""
}}

STRICT TAGGING RULES (MANDATORY):
- If creatinine > 2.0 → EVERY meal item MUST include "renal_safe"
- If glucose > 180 → EVERY meal item MUST include "diabetic_friendly" AND "low_sugar"
- If sodium > 145 → EVERY meal item MUST include "heart_healthy"
- If cholesterol > 240 → AVOID fried foods AND include "low_fat"
- Tags must be added EVEN IF the food is naturally safe

MEAL STYLE REQUIREMENT:
- Meals must be described in natural form:
  Example:
  "Oats with skim milk"
  "Grilled chicken salad with olive oil"
  "Vegetable soup with carrot and bottle gourd"
- Avoid listing raw ingredients alone.
- Follow Indian eating patterns.


"""

#  Calling  Gemini Model

response = client.models.generate_content(
    model="gemini-2.5-flash",   # or another available Gemini model
    contents=prompt_text        # pass prompt as a plain string
)


#  Extract text directly
diet_plan_text = response.text  # this property is correct usage
print("Raw Gemini output:\n", diet_plan_text)

#  Parse JSON Safely
try:
    diet_plan_json = json.loads(diet_plan_text)
except json.JSONDecodeError:
    # Clean up if necessary
    cleaned = diet_plan_text.strip().strip("```json").strip("```")
    diet_plan_json = json.loads(cleaned)

#  Save Output
with open("daily_diet_plan.json", "w") as f:
    json.dump(diet_plan_json, f, indent=4)
print("Diet plan JSON saved as daily_diet_plan.json")

     

Raw Gemini output:
 ```json
{
  "day_plan": {
    "breakfast": [
      {
        "item": "Banana, ripe, robusta",
        "calories": 126.19,
        "protein": 1.48,
        "fat": 0.40,
        "carbs": 28.36,
        "tags": []
      },
      {
        "item": "Almonds (small portion)",
        "calories": 182.77,
        "protein": 5.52,
        "fat": 17.55,
        "carbs": 0.91,
        "tags": []
      }
    ],
    "lunch": [
      {
        "item": "Bajra Roti with Moong Dal",
        "calories": 458.40,
        "protein": 20.77,
        "fat": 5.09,
        "carbs": 79.42,
        "tags": []
      },
      {
        "item": "Amaranth leaves (green) stir-fry",
        "calories": 75.59,
        "protein": 3.29,
        "fat": 5.65,
        "carbs": 2.28,
        "tags": []
      }
    ],
    "dinner": [
      {
        "item": "Steamed Rice",
        "calories": 234.00,
        "protein": 4.86,
        "fat": 0.54,
        "carbs": 50.40,
        "tags": []
      },
      {
  

In [8]:
import json
import time
from collections import defaultdict
import json
from google import genai

NUM_DAYS = 7  # number of days for the weekly plan

# Reusing previous prompt_text

weekly_prompt = prompt_text + f"""

ADDITIONAL INSTRUCTIONS (DO NOT IGNORE):

- Generate diet plans for {NUM_DAYS} DAYS.
- Each day MUST have meal VARIATION.
- Structure output EXACTLY as below.
- Compute weekly nutrition summary by summing all days.
- Generate a grocery list aggregated from all meals.

FINAL OUTPUT JSON FORMAT (STRICT):

{{
  "days_generated": {NUM_DAYS},
  "diet_plans": {{
    "day_1": {{ ... SAME STRUCTURE AS SINGLE DAY ... }},
    "day_2": {{ ... }},
    "...": {{ }}
  }},
  "weekly_nutrition_summary": {{
    "average_per_day": {{
      "calories": 0.0,
      "protein": 0.0,
      "fat": 0.0,
      "carbs": 0.0
    }},
    "weekly_total": {{
      "calories": 0.0,
      "protein": 0.0,
      "fat": 0.0,
      "carbs": 0.0
    }}
  }},
  "grocery_list": [
    {{
      "item": "",
      "times_used_in_week": 0
    }}
  ]
}}

Rules:
- Maintain ALL medical tagging rules.
- No forbidden foods.
- Indian meals only.
- No text outside JSON.
"""
# Single Gemini Call (Quota Safe)
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents=weekly_prompt
)

raw_output = response.text.strip()
print("Raw Weekly Output:\n", raw_output)

# Parse JSON Safely
try:
    weekly_diet_json = json.loads(raw_output)
except json.JSONDecodeError:
    cleaned = raw_output.strip("```json").strip("```")
    weekly_diet_json = json.loads(cleaned)

# save final weekly diet plan
with open("weekly_diet_plan.json", "w") as f:
    json.dump(weekly_diet_json, f, indent=4)

print(" Weekly diet plan saved as weekly_diet_plan.json")

     
     

Raw Weekly Output:
 ```json
{
  "days_generated": 7,
  "diet_plans": {
    "day_1": {
      "breakfast": [
        {
          "item": "Bajra Porridge (80g raw equivalent)",
          "calories": 286.0,
          "protein": 8.8,
          "fat": 4.3,
          "carbs": 49.4,
          "tags": ["healthy", "nutrient_dense", "alkaline_promoting", "vegetarian"]
        },
        {
          "item": "Banana (Robusta, 150g)",
          "calories": 157.7,
          "protein": 1.8,
          "fat": 0.5,
          "carbs": 35.4,
          "tags": ["healthy", "nutrient_dense", "alkaline_promoting", "vegetarian"]
        },
        {
          "item": "Almonds (30g)",
          "calories": 173.7,
          "protein": 5.5,
          "fat": 17.5,
          "carbs": 0.9,
          "tags": ["healthy", "nutrient_dense", "alkaline_promoting", "vegetarian"]
        }
      ],
      "lunch": [
        {
          "item": "Agathi Leaves Sabzi (200g cooked)",
          "calories": 590.0,
          "protei