In [1]:
import os
import json
import google.generativeai as genai


In [2]:
# --- CONFIGURATION ---

CREDENTIALS_FILE = '/Users/ashishrathore/Aarogya-AI/crack-decorator-468911-s1-5ab46e3aea4b.json' 


In [3]:
# --- SETUP ---

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = CREDENTIALS_FILE

genai.configure(transport='rest')


In [4]:
# --- RAW TEXT (Yeh humare Station 1 - Google Vision se aaya hai) ---
raw_text_from_report = """
DRLOGY PATHOLOGY LAB
Accurate Caring | Instant
0123456789 | 0912345678
drlogypathlab@drlogy.com
105-108, SMART VISION COMPLEX, HEALTHCARE ROAD, OPPOSITE HEALTHCARE COMPLEX. MUMBAI - 689578

Yashvi M. Patel
Age: 21 Years
Sex: Female
UHID: 556
Sample Collected At:
125, Shiv complex, S G Road, Mumbai
Sample Collected By: Mr Suresh
Ref. By: Dr. Hiren Shah

LIPID PROFILE
Investigation      Result    Reference Value
Total Cholesterol  150.0     <200 mg/dL
Triglycerides      100.0     <150 mg/dL
HDL Cholesterol    50.0      >40 mg/dL
LDL Cholesterol    80.0      <100 mg/dL
VLDL Cholesterol   20.0      <30 mg/dL

HEMOGRAM
Hemoglobin         14.5 g/dL  13.0-17.0
RBC Count          4.8 M/uL   4.5-5.5
"""

In [5]:
# --- INSTRUCTIONS FOR THE AI (Prompt Engineering for Gemini) ---
system_prompt = """
You are an expert AI system for parsing medical lab reports.
Your task is to extract specific medical information from the provided text.
The output MUST be a valid JSON object. Do not add any text or markdown formatting like ```json before or after the JSON.
The JSON object should have two main keys: 'patient_details' and 'test_results'.

'patient_details' should be an object containing: 'name', 'age', and 'sex'.
'test_results' should be an array of objects. Each object represents a single test and must contain:
- 'test_name': The name of the test (e.g., "Total Cholesterol").
- 'result': The numerical value of the test result. Convert it to a float.
- 'unit': The unit of measurement (e.g., "mg/dL").
- 'reference_range': The reference value or range for the test.

If any value is not found, use null.
"""


In [6]:
# --- SENDING THE MESSAGE TO GEMINI ---
print("--- Sending raw text to Google Gemini AI ---")
try:

    model = genai.GenerativeModel(
        'gemini-1.5-flash',
        generation_config=genai.GenerationConfig(
            response_mime_type="application/json"
        )
    )
    

    response = model.generate_content([system_prompt, raw_text_from_report])
    
    # --- VIEWING THE RESULT ---
    extracted_data_text = response.text
    print("--- ✅ AI has extracted the following structured data (JSON) ---")
    print(extracted_data_text)
    
    # --- BONUS: Nicely formatted output ---
    parsed_json = json.loads(extracted_data_text)

    print("\n\n--- Patient Details ---")
    print(f"Name: {parsed_json.get('patient_details', {}).get('name', 'N/A')}")
    print(f"Age: {parsed_json.get('patient_details', {}).get('age', 'N/A')}")
    print(f"Sex: {parsed_json.get('patient_details', {}).get('sex', 'N/A')}")

    print("\n--- Test Results ---")
    if parsed_json.get('test_results'):
        for test in parsed_json.get('test_results', []):
            print(f"Test: {test.get('test_name', 'N/A')}, Result: {test.get('result', 'N/A')} {test.get('unit', '')}")
    else:
        print("No test results found.")


except Exception as e:
    print(f"\n--- ❌ An error occurred ---")
    print(e)

--- Sending raw text to Google Gemini AI ---
--- ✅ AI has extracted the following structured data (JSON) ---
{"patient_details": {"name": "Yashvi M. Patel", "age": 21, "sex": "Female"}, "test_results": [{"test_name": "Total Cholesterol", "result": 150.0, "unit": "mg/dL", "reference_range": "<200 mg/dL"}, {"test_name": "Triglycerides", "result": 100.0, "unit": "mg/dL", "reference_range": "<150 mg/dL"}, {"test_name": "HDL Cholesterol", "result": 50.0, "unit": "mg/dL", "reference_range": ">40 mg/dL"}, {"test_name": "LDL Cholesterol", "result": 80.0, "unit": "mg/dL", "reference_range": "<100 mg/dL"}, {"test_name": "VLDL Cholesterol", "result": 20.0, "unit": "mg/dL", "reference_range": "<30 mg/dL"}, {"test_name": "Hemoglobin", "result": 14.5, "unit": "g/dL", "reference_range": "13.0-17.0"}, {"test_name": "RBC Count", "result": 4.8, "unit": "M/uL", "reference_range": "4.5-5.5"}]}


--- Patient Details ---
Name: Yashvi M. Patel
Age: 21
Sex: Female

--- Test Results ---
Test: Total Cholesterol