In [5]:
# 1. Install core SDKs
!pip install -U google-genai pdf2image

# 2. Imports
import os
import json
from google import genai
from google.genai import types as gtypes
from pydantic import BaseModel, Field

# 3. API Key Setup (Use your sk- or AIzaSy- key here)
os.environ["GEMINI_API_KEY"] = "YOUR_GEMINI_API_KEY"
gemini_api_key = os.environ.get("GEMINI_API_KEY")

print("Environment Ready")

Environment Ready



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
class PracticeLocation(BaseModel):
    address: str = Field(description="Full street address found on the form.")
    phone: str = Field(description="Primary contact phone number.")

class ProviderJSON(BaseModel):
    provider_name: str = Field(description="Full name of the clinician or organization.")
    npi_number: str = Field(description="10-digit National Provider Identifier.")
    specialty: str = Field(description="Primary medical specialty listed.")
    locations: list[PracticeLocation] = Field(description="List of all practice sites found.")

print("Schema defined. This is your target standard.")

Schema defined. This is your target standard.


In [7]:
# --- VLM Extraction Function (Using Gemini 2.5 Flash) ---

def extract_from_pdf_vlm(pdf_path, schema_class, api_key):
    client = genai.Client(api_key=api_key)
    
    # Instructions emphasizing visual parsing and structured output
    prompt = (
        "Analyze the healthcare provider PDF visually. "
        "Extract provider identity and practice information from the form fields. "
        "Format the output strictly as JSON matching the requested schema."
    )

    with open(pdf_path, "rb") as f:
        pdf_bytes = f.read()

    response = client.models.generate_content(
        # Using a highly cost-efficient and performant VLM
        model='gemini-2.5-flash',
        contents=[
            prompt,
            gtypes.Part.from_bytes(data=pdf_bytes, mime_type="application/pdf")
        ],
        config=gtypes.GenerateContentConfig(
            response_mime_type="application/json",
            response_schema=schema_class,
            temperature=0.1 # Low temperature for high extraction accuracy
        )
    )
    
    return json.loads(response.text)

print("VLM Agent initialized using Gemini 2.5 Flash.")

VLM Agent initialized using Gemini 2.5 Flash.


In [8]:
# --- Execution for Healthcare Form ---

# Update this variable to match your uploaded PDF filename exactly
PDF_FILE = "main_drug-prior-authorization-form-molina-healthcare.pdf" 

if os.path.exists(PDF_FILE):
    print(f"Vision Agent is analyzing: {PDF_FILE}...")
    
    try:
        # Run the VLM extraction using the schema from Cell 2
        extracted_json = extract_from_pdf_vlm(PDF_FILE, ProviderJSON, gemini_api_key)
        
        # Save the result to a standardized JSON file
        output_file = "molina_provider_data.json"
        with open(output_file, "w") as f:
            json.dump(extracted_json, f, indent=4)
        
        print(f"\n✅ SUCCESS: Data stored in {output_file}")
        
        # Display the result formatted nicely
        print("\n--- EXTRACTED JSON STRUCTURE ---")
        print(json.dumps(extracted_json, indent=4))
        
    except Exception as e:
        print(f"❌ Extraction Error: {e}")
        print("Tip: Ensure your GEMINI_API_KEY is correct and has quota available.")
else:
    print(f"❌ Error: Could not find '{PDF_FILE}'.")
    print("Please click the 'Upload' button in your notebook sidebar and add the PDF.")

Vision Agent is analyzing: main_drug-prior-authorization-form-molina-healthcare.pdf...

✅ SUCCESS: Data stored in molina_provider_data.json

--- EXTRACTED JSON STRUCTURE ---
{
    "provider_name": "",
    "npi_number": "",
    "specialty": "",
    "locations": [
        {
            "address": "",
            "phone": ""
        }
    ]
}
