In [2]:
import google.generativeai as genai
import pathlib
import httpx
import os

from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Check if API key is loaded
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
    raise ValueError("GOOGLE_API_KEY not found in environment variables. Please check your .env file.")

# Configure the API
genai.configure(api_key=api_key)

# Retrieve and encode the PDF byte
filepath = pathlib.Path('Input Data/Adbulla/referral_package.pdf')


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
prompt = """Please analyze this medical document and extract all available information into a structured JSON format. This document may be a referral form, prior authorization (PA) request, medical record, or other healthcare document.

Follow this comprehensive JSON structure, filling in only the fields that are present in the document:
{
    "document_type": "",
    "document_date": "",
    "patient_information": {
        "name": "",
        "date_of_birth": "",
        "age": "",
        "gender": "",
        "address": {
            "street": "",
            "city": "",
            "state": "",
            "zip_code": ""
        },
        "phone": "",
        "email": "",
        "insurance_id": "",
        "member_id": "",
        "group_number": "",
        "policy_number": "",
        "mrn": "",
        "ssn_last_four": "",
        "emergency_contact": ""
    },
    "insurance_information": {
        "primary_insurance": "",
        "secondary_insurance": "",
        "payer_name": "",
        "plan_type": "",
        "effective_date": "",
        "copay": "",
        "deductible": ""
    },
    "healthcare_providers": {
        "requesting_provider": {
            "name": "",
            "title": "",
            "specialty": "",
            "practice_name": "",
            "address": "",
            "phone": "",
            "fax": "",
            "email": "",
            "npi": "",
            "license_number": "",
            "tax_id": ""
        },
        "referring_provider": {
            "name": "",
            "specialty": "",
            "practice_name": "",
            "phone": "",
            "npi": ""
        },
        "facility_information": {
            "name": "",
            "address": "",
            "phone": "",
            "fax": ""
        }
    },
    "clinical_information": {
        "primary_diagnosis": "",
        "secondary_diagnoses": [],
        "icd_10_codes": [],
        "chief_complaint": "",
        "symptoms": [],
        "duration_of_condition": "",
        "severity": "",
        "current_medications": [],
        "medication_dosages": [],
        "allergies": [],
        "allergy_reactions": [],
        "relevant_medical_history": "",
        "family_history": "",
        "social_history": "",
        "surgical_history": "",
        "examination_findings": "",
        "vital_signs": {},
        "lab_results": [],
        "imaging_studies": [],
        "previous_treatments": [],
        "treatment_outcomes": ""
    },
    "requested_services": {
        "service_type": "",
        "procedure_codes": [],
        "cpt_codes": [],
        "hcpcs_codes": [],
        "requested_medication": "",
        "dosage_and_frequency": "",
        "quantity_requested": "",
        "duration_of_therapy": "",
        "place_of_service": "",
        "urgency_level": "",
        "start_date": "",
        "end_date": ""
    },
    "prior_authorization": {
        "pa_number": "",
        "authorization_status": "",
        "approval_date": "",
        "expiration_date": "",
        "approved_services": [],
        "denied_services": [],
        "limitations_or_conditions": [],
        "appeal_information": ""
    },
    "administrative_details": {
        "form_type": "",
        "submission_date": "",
        "urgency": "",
        "rush_request": "",
        "follow_up_required": "",
        "contact_person": "",
        "case_number": "",
        "reference_number": "",
        "authorization_required": "",
        "estimated_cost": ""
    },
    "signatures_and_attestations": {
        "provider_signature": "",
        "provider_signature_date": "",
        "patient_consent": "",
        "patient_signature_date": "",
        "attestation_statements": []
    },
    "additional_information": {
        "supporting_documentation": [],
        "clinical_notes": "",
        "special_instructions": "",
        "handwritten_comments": "",
        "stamps_or_markings": [],
        "attachments_mentioned": [],
        "other_relevant_info": ""
    }
}

Extraction Guidelines:
1. **Document Analysis**: First identify the type of medical document (referral, PA request, medical record, etc.)
2. **Comprehensive Extraction**: Extract ALL visible text, checkboxes, form fields, stamps, and markings
3. **Field Mapping**: Map information to the most appropriate JSON fields, even if the document doesn't use standard terminology
4. **Data Formatting**: 
   - Convert dates to YYYY-MM-DD format when possible
   - Preserve exact spelling and capitalization from original document
   - For arrays, provide complete lists of all items found
5. **Missing Information**: Keep it blank for empty or unclear fields
6. **Clinical Codes**: Extract all medical codes (ICD-10, CPT, HCPCS) with their descriptions
7. **Provider Information**: Capture all healthcare provider details including NPIs, addresses, and contact info
8. **Special Elements**: Include any handwritten notes, signatures, official stamps, or unique markings
9. **Medication Details**: For medication requests, include exact names, dosages, frequencies, and quantities
10. **Authorization Details**: For PA forms, capture all approval/denial information and conditions

Output Requirements:
- Return ONLY valid JSON without markdown formatting
- Include all extracted information even if it doesn't fit perfectly into the structure
- Preserve medical terminology and abbreviations as written
- Do not add interpretations or assumptions beyond what's explicitly stated"""

# Create the model
model = genai.GenerativeModel('gemini-1.5-flash')

# Generate content
response = model.generate_content([
    prompt,
    {
        "mime_type": "application/pdf",
        "data": filepath.read_bytes()
    }
])

print(response.text)

```json
{
  "patient_information": {
    "name": "Shakh Abdulla",
    "date_of_birth": "2001-04-01",
    "gender": "male",
    "address": {
      "street": "425 Sherman Ave APT D",
      "city": "Nashville",
      "state": "TN",
      "zip_code": "37923"
    },
    "phone": "865-395-3958",
    "email": "not specified",
    "insurance_id": "LAJM14345116",
    "mrn": "041152153",
    "ssn_last_four": "8143"
  },
  "referring_provider": {
    "name": "Erfan Rostami",
    "title": "BSN, RN",
    "practice": "Better Life Multiple Sclerosis Center",
    "address": {
      "street": "3320 Montgomery Dr",
      "city": "Nashville",
      "state": "TN",
      "zip_code": "37361"
    },
    "phone": "615-343-1176",
    "fax": "615-343-1219",
    "email": "not specified",
    "npi": "not specified",
    "license_number": "not specified"
  },
  "specialist_referral": {
    "specialty": "Infusion",
    "provider_name": "Golden Gate Infusion Center",
    "practice": "Golden Gate Infusion Center",
  