In [2]:
import google.generativeai as genai
import pathlib
import httpx
import os

from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Check if API key is loaded
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
    raise ValueError("GOOGLE_API_KEY not found in environment variables. Please check your .env file.")

# Configure the API
genai.configure(api_key=api_key)

# Retrieve and encode the PDF byte
filepath = pathlib.Path('Input Data/Adbulla/PA.pdf')


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
prompt = """Please analyze this medical referral package document and extract all available information into a structured JSON format. This document is likely a referral form or referral package that may contain multiple pages with patient information, provider details, clinical notes, and handwritten annotations.

IMPORTANT: This document may contain handwritten text, stamps, signatures, and various form fields. Pay special attention to:
- Handwritten patient names, dates, and clinical notes (may be in cursive, print, or mixed styles)
- Stamped information (dates, provider stamps, approval stamps, fax transmission stamps)
- Checkboxes that may be marked with X, checkmarks, circles, or filled squares
- Form fields that may be filled in by hand, typed, or a combination
- Multiple pages that may contain different types of information
- Partial or incomplete information where forms are only partially filled

EXTRACTION GUIDELINES:
1. If information is not clearly visible or present, use null (not empty strings)
2. Preserve exact text as it appears, including spelling variations
3. For dates, maintain the original format found in the document
4. For checkboxes, note what is marked and what the options were
5. Capture both typed and handwritten content separately when both exist
6. Include any crossed-out or corrected information in notes

REFERRAL PACKAGE SPECIFIC PATTERNS TO LOOK FOR:
- "Referral to:" or "Refer to:" sections with specialist details
- "Reason for referral:" or "Clinical indication:" sections
- Prior authorization numbers or case numbers
- Insurance verification sections
- Appointment scheduling preferences
- Clinical history and examination findings
- Medication lists and allergy information
- Provider contact information and NPIs
- Patient consent forms or signatures

Follow this comprehensive JSON structure, filling in only the fields that are clearly present in the document:

{
    "document_metadata": {
        "document_type": "",
        "form_name": "",
        "form_version": "",
        "total_pages": "",
        "document_date": "",
        "submission_method": "",
        "document_quality": ""
    },
    "patient_information": {
        "name": {
            "full_name": "",
            "first_name": "",
            "last_name": "",
            "middle_initial": "",
            "name_source": ""
        },
        "demographics": {
            "date_of_birth": "",
            "age": "",
            "gender": "",
            "marital_status": "",
            "preferred_language": ""
        },
        "contact_information": {
            "address": {
                "street": "",
                "city": "",
                "state": "",
                "zip_code": "",
                "country": ""
            },
            "phone_numbers": {
                "primary": "",
                "secondary": "",
                "work": "",
                "mobile": ""
            },
            "email": "",
            "emergency_contact": {
                "name": "",
                "relationship": "",
                "phone": ""
            }
        },
        "identifiers": {
            "mrn": "",
            "ssn_last_four": "",
            "patient_id": "",
            "account_number": ""
        }
    },
    "insurance_information": {
        "primary_insurance": {
            "insurance_name": "",
            "member_id": "",
            "group_number": "",
            "policy_number": "",
            "plan_type": "",
            "effective_date": "",
            "expiration_date": "",
            "copay": "",
            "deductible": "",
            "subscriber_name": "",
            "subscriber_relationship": ""
        },
        "secondary_insurance": {
            "insurance_name": "",
            "member_id": "",
            "group_number": "",
            "policy_number": "",
            "plan_type": ""
        },
        "authorization_info": {
            "auth_required": "",
            "auth_number": "",
            "auth_date": "",
            "auth_expiration": ""
        }
    },
    "healthcare_providers": {
        "requesting_provider": {
            "name": "",
            "title": "",
            "specialty": "",
            "practice_name": "",
            "address": "",
            "phone": "",
            "fax": "",
            "email": "",
            "npi": "",
            "license_number": "",
            "tax_id": ""
        },
        "referring_provider": {
            "name": "",
            "specialty": "",
            "practice_name": "",
            "phone": "",
            "npi": "",
            "relationship_to_patient": ""
        },
        "receiving_provider": {
            "name": "",
            "specialty": "",
            "practice_name": "",
            "department": "",
            "address": "",
            "phone": "",
            "fax": "",
            "npi": "",
            "appointment_contact": ""
        },
        "facility_information": {
            "name": "",
            "type": "",
            "address": "",
            "phone": "",
            "fax": "",
            "department": ""
        }
    },
    "referral_details": {
        "referral_information": {
            "referral_type": "",
            "referral_date": "",
            "referral_reason": "",
            "clinical_indication": "",
            "urgency_level": "",
            "appointment_type": "",
            "referral_number": "",
            "case_number": "",
            "follow_up_required": ""
        },
        "requested_services": {
            "service_type": "",
            "specific_procedures": [],
            "procedure_codes": [],
            "cpt_codes": [],
            "hcpcs_codes": [],
            "place_of_service": "",
            "frequency": "",
            "duration": "",
            "start_date": "",
            "end_date": ""
        }
    },
    "clinical_information": {
        "diagnoses": {
            "primary_diagnosis": "",
            "secondary_diagnoses": [],
            "icd_10_codes": [],
            "diagnosis_date": ""
        },
        "presenting_condition": {
            "chief_complaint": "",
            "symptoms": [],
            "symptom_onset": "",
            "duration_of_condition": "",
            "severity_level": "",
            "functional_impact": ""
        },
        "medications": {
            "current_medications": [],
            "dosages": [],
            "frequencies": [],
            "medication_allergies": [],
            "allergy_reactions": []
        },
        "medical_history": {
            "relevant_history": "",
            "previous_treatments": [],
            "treatment_outcomes": "",
            "surgical_history": "",
            "family_history": "",
            "social_history": ""
        },
        "clinical_findings": {
            "examination_findings": "",
            "vital_signs": {},
            "lab_results": [],
            "imaging_studies": [],
            "diagnostic_tests": []
        },
        "assessment_and_plan": {
            "clinical_assessment": "",
            "treatment_plan": "",
            "goals_of_referral": "",
            "expected_outcomes": ""
        }
    },
    "handwritten_content": {
        "handwritten_sections": {
            "patient_information": [],
            "clinical_notes": [],
            "provider_notes": [],
            "appointment_notes": [],
            "other_handwritten": []
        },
        "signatures_and_dates": {
            "provider_signature": "",
            "signature_date": "",
            "patient_signature": "",
            "patient_signature_date": "",
            "witness_signature": ""
        },
        "stamps_and_markings": {
            "date_stamps": [],
            "approval_stamps": [],
            "received_stamps": [],
            "other_stamps": []
        }
    },
    "form_data": {
        "checkboxes_marked": {
            "service_requests": [],
            "urgency_selections": [],
            "appointment_preferences": [],
            "consent_acknowledgments": [],
            "other_selections": []
        },
        "form_fields_completed": [],
        "sections_completed": [],
        "incomplete_sections": []
    },
    "administrative_details": {
        "processing_information": {
            "submission_date": "",
            "received_date": "",
            "processed_date": "",
            "case_number": "",
            "reference_number": "",
            "priority_level": ""
        },
        "approval_status": {
            "status": "",
            "approval_date": "",
            "approved_by": "",
            "expiration_date": "",
            "limitations": ""
        },
        "contact_information": {
            "primary_contact": "",
            "contact_phone": "",
            "contact_email": "",
            "best_time_to_call": ""
        }
    },
    "additional_information": {
        "supporting_documentation": [],
        "special_instructions": "",
        "patient_preferences": "",
        "transportation_needs": "",
        "interpreter_needed": "",
        "attachments_mentioned": [],
        "notes_and_comments": ""
    }
}

FINAL INSTRUCTIONS:
- Return ONLY valid JSON without any markdown formatting or explanatory text
- Use null for any field where information is not clearly present or legible
- Preserve original formatting and spelling from the document
- If handwriting is unclear, note this in the appropriate field with [unclear handwriting]
- For partially legible text, include what you can read followed by [partial]
"""

# Create the model
model = genai.GenerativeModel('gemini-2.0-flash')

# Generate content
response = model.generate_content([
    prompt,
    {
        "mime_type": "application/pdf",
        "data": filepath.read_bytes()
    }
])

# Store the response text
PA_text = response.text
# Print the response text
print("Extracted JSON:")
print(response.text)

Extracted JSON:
```json
{
    "document_metadata": {
        "document_type": "MEDICARE FORM",
        "form_name": "Medication Precertification Request",
        "form_version": "GR-68535-3 (1-25)",
        "total_pages": "5",
        "document_date": null,
        "submission_method": null,
        "document_quality": null
    },
    "patient_information": {
        "name": {
            "full_name": null,
            "first_name": null,
            "last_name": null,
            "middle_initial": null,
            "name_source": null
        },
        "demographics": {
            "date_of_birth": null,
            "age": null,
            "gender": null,
            "marital_status": null,
            "preferred_language": null
        },
        "contact_information": {
            "address": {
                "street": null,
                "city": null,
                "state": null,
                "zip_code": null,
                "country": null
            },
            "p