In [1]:
import google.generativeai as genai
import pathlib
import httpx
import os

from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Check if API key is loaded
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
    raise ValueError("GOOGLE_API_KEY not found in environment variables. Please check your .env file.")

# Configure the API
genai.configure(api_key=api_key)

# Retrieve and encode the PDF byte
filepath = pathlib.Path('Input Data/Adbulla/PA.pdf')


In [2]:
prompt = """You are analyzing a Prior Authorization (PA) form PDF to understand its structure and extract form field information for automated filling.

CONTEXT: This is part of a healthcare automation pipeline where PA forms need to be filled using information from referral packages. PA forms are structured PDFs that may contain fillable form widgets (AcroForm fields) or be image-based forms.

TASK: Extract and analyze the PA form structure to identify:
1. All form fields that need to be filled
2. Field types (text, checkbox, dropdown, etc.)
3. Required vs optional fields
4. Conditional logic and field dependencies
5. Mutually exclusive options
6. Branching sections based on selections

IMPORTANT CONSIDERATIONS:
- PA forms contain mutually exclusive options (e.g., "New Patient" vs "Existing Patient")
- Some sections are conditional and only relevant based on previous answers
- Not every field should be filled - only appropriate fields based on patient situation
- Focus on identifying fillable form widgets and their validation rules

ANALYSIS STRUCTURE - Return as JSON:
{
    "form_metadata": {
        "form_type": "Prior Authorization",
        "drug_name": null,
        "insurance_company": null,
        "form_version": null,
        "total_pages": null,
        "has_fillable_widgets": null,
        "form_complexity": null
    },
    "form_fields": {
        "patient_information": {
            "required_fields": [],
            "optional_fields": [],
            "field_types": {},
            "validation_rules": {}
        },
        "provider_information": {
            "required_fields": [],
            "optional_fields": [],
            "field_types": {},
            "validation_rules": {}
        },
        "insurance_information": {
            "required_fields": [],
            "optional_fields": [],
            "field_types": {},
            "validation_rules": {}
        },
        "clinical_information": {
            "required_fields": [],
            "optional_fields": [],
            "field_types": {},
            "validation_rules": {}
        },
        "medication_details": {
            "required_fields": [],
            "optional_fields": [],
            "field_types": {},
            "validation_rules": {}
        },
        "medical_necessity": {
            "required_fields": [],
            "optional_fields": [],
            "field_types": {},
            "validation_rules": {}
        }
    },
    "form_logic": {
        "conditional_fields": {},
        "mutually_exclusive_groups": [],
        "branching_sections": {},
        "required_attachments": []
    },
    "fillable_widgets": {
        "text_fields": [],
        "checkboxes": [],
        "radio_buttons": [],
        "dropdowns": [],
        "signature_fields": [],
        "date_fields": []
    },
    "completion_criteria": {
        "minimum_required_fields": [],
        "completion_percentage_estimate": null,
        "critical_missing_fields": []
    }
}

EXTRACTION GUIDELINES:
1. Focus on identifying actual fillable form fields, not just text content
2. Note field names, types, and any visible validation requirements
3. Identify sections that are mutually exclusive
4. Look for conditional logic indicators (e.g., "If yes, complete section B")
5. Distinguish between required fields (marked with *) and optional fields
6. Identify any pre-filled or locked fields
7. Note any signature requirements or date fields

Return ONLY the JSON structure without markdown formatting."""

# Create the model
model = genai.GenerativeModel('gemini-2.0-flash')

# Generate content
response = model.generate_content([
    prompt,
    {
        "mime_type": "application/pdf",
        "data": filepath.read_bytes()
    }
])

# Store the response text
PA_form_analysis = response.text
# Print the response text
print("PA Form Analysis:")
print(response.text)

PA Form Analysis:
```json
{
    "form_metadata": {
        "form_type": "Prior Authorization",
        "drug_name": "Riabni, Rituxan, Ruxience, Truxima",
        "insurance_company": "Aetna Medicare",
        "form_version": "GR-68535-3 (1-25)",
        "total_pages": 5,
        "has_fillable_widgets": true,
        "form_complexity": "High"
    },
    "form_fields": {
        "patient_information": {
            "required_fields": [
                "First Name",
                "Last Name",
                "Address",
                "City",
                "State",
                "ZIP",
                "DOB"
            ],
            "optional_fields": [
                "Home Phone",
                "Work Phone",
                "Cell Phone",
                "E-mail",
                "Current Weight",
                "Height",
                "Allergies"
            ],
            "field_types": {
                "First Name": "text",
                "Last Name": "text",
           

In [12]:
import os
from mistralai import Mistral
from dotenv import load_dotenv

load_dotenv()
client = Mistral(api_key=os.getenv("MISTRAL_API_KEY"))

uploaded = client.files.upload(
    file={"file_name": "referral_package.pdf", "content": open('Input Data/Adbulla/referral_package.pdf', "rb")},
    purpose="ocr"
)
signed = client.files.get_signed_url(file_id=uploaded.id)

ocr = client.ocr.process(
    model="mistral-ocr-latest",
    document={"type": "document_url", "document_url": signed.url},
    include_image_base64=False
)

# Save structured content in a variable
referral_package_text = ""
for pg in ocr.pages:
    referral_package_text += pg.markdown + "\n\n"

# Print structured content
for pg in ocr.pages:
    print(pg.markdown)


04/22/2024 WED 11:32 FAX

001/028

# FAX TRANSMISSION

**Better Life Multiple Sclerosis Center**

3320 Montgomery Dr. Nashville, TN 37361

**BetterLife**

F 615-562-4820 P: 615-562-4848

Dr. Asriel Han | Dr. Aetya Shan

---

**TO:**

Golden Gate Infusion Center

**Fax:** 614-225-3355 **Phone:** 614-295-7655

**From:** Erran Rostami, BSN, RN

**P:** 615-343-1176

**F:** 615-343-1219

---

**Page**

(including cover sheet)

---

**Comments:**

- Arabic - spoken / English - written
- Rituxen (Truxima) TP
- MRI Reports
- Hospital DIC Make
- Demographics

---

The documents accompanying this transmission may contain health information that is legally protected. This information is intended only for the use of the individual or entity named above. The authorized recipient of this information is prohibited from disclosing this information to any other party unless permitted by law or regulation.

If you are not the intended recipient, you are hereby notified that any use, disclosure, copying 

In [13]:
# Enhanced prompt for referral package analysis
referral_prompt = """You are analyzing a referral package (collection of scanned medical documents) to extract patient information that will be used to fill a Prior Authorization form.

CONTEXT: This referral package contains scanned documents like insurance cards, medical history notes, test results, and other supporting documentation. The extracted information will be mapped to specific fields in a PA form.

EXTRACTION FOCUS: Extract all available information that could be relevant for PA form completion, including:

PATIENT DEMOGRAPHICS:
- Full name, DOB, gender, address, phone numbers
- Patient ID numbers, MRN, account numbers
- Emergency contacts and relationships

INSURANCE INFORMATION:
- Insurance company name and plan details
- Member ID, group number, policy number
- Subscriber information and relationship to patient
- Coverage details and effective dates

CLINICAL INFORMATION:
- Primary and secondary diagnoses with ICD codes
- Current medications, dosages, and frequencies
- Allergies and adverse reactions
- Vital signs and lab results
- Previous treatments and outcomes
- Medical history and comorbidities

PROVIDER INFORMATION:
- Referring physician name, specialty, and contact info
- Practice name and address
- NPI numbers and license information
- Facility details where treatment will occur

TREATMENT DETAILS:
- Requested medication/procedure/device
- Dosage, frequency, duration
- Medical necessity justification
- Previous treatment failures
- Clinical criteria met for approval

SUPPORTING EVIDENCE:
- Lab results supporting diagnosis
- Imaging studies and results
- Functional assessments
- Specialist consultations
- Treatment response documentation

Return structured JSON with all extracted information, using null for missing data."""

# Extract structured data from referral package
model = genai.GenerativeModel('gemini-2.0-flash')
referral_response = model.generate_content([
    referral_prompt,
    f"REFERRAL PACKAGE OCR TEXT:\n{referral_package_text}"
])

referral_data = referral_response.text
print("Referral Package Data:")
print(referral_data)

Referral Package Data:
```json
{
  "PATIENT_DEMOGRAPHICS": {
    "full_name": "Shakh Abdulla",
    "dob": "04/01/2001",
    "gender": "male",
    "address": "425 Sherman Ave\nAPT D\nNashville TN 37995, 8327 BROADWAY LN\nAPT D\nKNOXVILLE, TN 37923",
    "phone_numbers": {
      "home": "865-395-3958",
      "mobile": "865-395-0481"
    },
    "patient_id_numbers": {
      "mrn": "041152153, 048152153",
      "care_everywhere_id": "VDJ-TKR2-484T-LGF8"
    },
    "emergency_contacts": [
      {
        "name": "Sina, Amin",
        "relationship": "Mother",
        "phone": null
      },
      {
        "name": "Mohammedraza, Musiala",
        "relationship": null,
        "phone": null
      }
    ],
    "account_number": null
  },
  "INSURANCE_INFORMATION": {
    "insurance_company_name": "BC TENNCARE, TC BLUE CARE NO COPAY",
    "plan_details": "TC BLUE CARE NO COPAY",
    "member_id": "LAJM14345116",
    "group_number": "435000",
    "policy_number": null,
    "subscriber_information"

In [None]:
filling_prompt = """You are creating a form-filling strategy for a Prior Authorization form based on available patient data from a referral package.

TASK: Analyze the PA form structure and referral package data to create a mapping strategy that determines:
1. Which fields can be filled with available data
2. Which fields should remain blank (due to conditional logic or missing data)
3. How to handle mutually exclusive options
4. What information is missing and required

FORM FILLING RULES:
1. Only fill fields for which you have clear, accurate data
2. Respect mutually exclusive options (e.g., don't check both "New Patient" AND "Existing Patient")
3. Follow conditional logic (e.g., only fill dependent sections if trigger conditions are met)
4. Prioritize required fields over optional fields
5. Use exact text matching where possible
6. Format dates, phone numbers, and other data according to field requirements

MAPPING STRATEGY:
- Map referral package data to specific PA form fields
- Identify any data transformations needed
- Note confidence levels for each mapping
- Flag any ambiguous or unclear mappings

QUALITY ASSURANCE:
- Verify logical consistency of filled fields
- Ensure no conflicting information is entered
- Validate that conditional sections are appropriately completed
- Check that all required fields have been addressed

Return a detailed filling strategy as JSON:
{
    "filling_strategy": {
        "field_mappings": {},
        "conditional_logic_applied": {},
        "mutually_exclusive_selections": {},
        "data_transformations": {},
        "confidence_scores": {}
    },
    "completion_analysis": {
        "fillable_fields_count": null,
        "filled_fields_count": null,
        "completion_percentage": null,
        "critical_missing_fields": [],
        "optional_missing_fields": []
    },
    "form_values": {
        "patient_information": {},
        "provider_information": {},
        "insurance_information": {},
        "clinical_information": {},
        "medication_details": {},
        "medical_necessity": {}
    },
    "missing_information_report": {
        "required_but_missing": [],
        "recommended_but_missing": [],
        "could_not_determine": [],
        "data_quality_issues": []
    }
}"""

# Create form filling strategy
model = genai.GenerativeModel('gemini-2.0-flash')

strategy_response = model.generate_content([
    filling_prompt,
    f"PA FORM ANALYSIS:\n{PA_form_analysis}",
    f"REFERRAL PACKAGE DATA:\n{referral_data}"
])

filling_strategy = strategy_response.text
print("Form Filling Strategy:")
print(filling_strategy)