4. Now trying all 1000+ files:

    Run it directly in PowerShell 7.
    If using the H2 configuration you can restart the container between batches to free memory.
    The `hapi-data` volume preserves the database so uploads will resume where they left off.

In [None]:
import subprocess
import os

# Configure log directory and path
log_dir = r"\\Desktop-family\\k\\self_learning_healthcarebigdata\\02_hapi_fhir_synthea_load_powershell"
os.makedirs(log_dir, exist_ok=True)
log_path = os.path.join(log_dir, "upload_log.txt")

# Use PowerShell Tee-Object to write full log
ps_command = f"& {{ .\\upload_fhir_files.ps1 -Throttle 4 | Tee-Object -FilePath '{log_path}' }}"
print(f"Running upload script with PowerShell logging to {log_path}")
subprocess.run(["pwsh", "-NoProfile", "-Command", ps_command])

# Display last 300 lines from log with safe encoding
print("\nLast 300 log lines:")
with open(log_path, 'r', errors='replace') as f:
    lines = f.readlines()
for line in lines[-300:]:
    print(line, end='')


Running: pwsh -ExecutionPolicy Bypass -File upload_fhir_files.ps1 -Throttle 4. Log will be saved to \\Desktop-family\\k\\self_learning_healthcarebigdata\\02_hapi_fhir_synthea_load_powershell\upload_log.txt


Exception in thread Thread-44 (_readerthread):
Traceback (most recent call last):
  File "c:\Users\evan_\anaconda3\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\Users\evan_\anaconda3\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "c:\Users\evan_\anaconda3\Lib\subprocess.py", line 1599, in _readerthread
    buffer.append(fh.read())
                  ^^^^^^^^^
  File "<frozen codecs>", line 322, in decode
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa1 in position 1970: invalid start byte


TypeError: write() argument must be str, not None

## 4  Validation suite

# 4.1 Resource counts

In [None]:
# 3. Resource counts test
import subprocess, sys

ps_script = r"""
$fhirBase = "http://localhost:8080/fhir"
$results = [ordered]@{}

Write-Output "`n3. Checking for existing resources..."
$resourceTypes = @(
    "Patient", "Encounter", "Observation", "Condition", 
    "Procedure", "MedicationRequest", "Immunization"
)
$resourceCounts = @{}

foreach ($type in $resourceTypes) {
    try {
        $response = Invoke-RestMethod -Uri "$fhirBase/$type`?_summary=count" -Method Get -TimeoutSec 15
        $count = $response.total
        $resourceCounts[$type] = $count
        
        if ($count -gt 0) {
            Write-Output "   Found $count $type resources"
        } else {
            Write-Output "No $type resources found"
        }
    } catch {
        $errorMessage = $_.Exception.Message
        $resourceCounts[$type] = "Error: $errorMessage"
        Write-Output "   Error checking $type - $errorMessage"
    }
}

$results["Resource Counts"] = $resourceCounts
"""

print("Running comprehensive HAPI FHIR server health check...")
result = subprocess.run(["powershell", "-NoLogo", "-NoProfile", "-Command", ps_script], 
                        capture_output=True, text=True)

print(result.stdout)
if result.stderr:
    print("STDERR:", result.stderr, file=sys.stderr)
    


# 4.2 Test a sample Patient query

In [None]:
# 4. Test a sample Patient query
import subprocess, sys

ps_script = r"""
$fhirBase = "http://localhost:8080/fhir"
$results = [ordered]@{}

Write-Output "`n4. Testing Patient query..."
try {
    $patients = Invoke-RestMethod -Uri "$fhirBase/Patient?_count=1" -TimeoutSec 15
    
    if ($patients.entry -and $patients.entry.Count -gt 0) {
        $patientId = $patients.entry[0].resource.id
        $results["Sample Patient"] = "Found (ID: $patientId)"
        Write-Output "   Successfully retrieved patient with ID: $patientId"
        
        # Try to get everything for this patient
        try {
            $everything = Invoke-RestMethod -Uri "$fhirBase/Patient/$patientId/`$everything" -TimeoutSec 30
            $linkedResources = $everything.entry.Count - 1  # Subtract 1 for the patient resource itself
            $results["Patient Graph"] = "$linkedResources linked resources"
            Write-Output "   Patient has $linkedResources linked resources"
        } catch {
            $errorMessage = $_.Exception.Message
            $results["Patient Graph"] = " FAILED: $errorMessage"
            Write-Output "   Could not retrieve patient graph: $errorMessage"
        }
    } else {
        $results["Sample Patient"] = "None found"
        Write-Output "  No patients found in database"
    }
} catch {
    $errorMessage = $_.Exception.Message
    $results["Sample Patient"] = " FAILED: $errorMessage"
    Write-Output "   Error retrieving patients: $errorMessage"
}
"""

print("Running comprehensive HAPI FHIR server health check...")
result = subprocess.run(["powershell", "-NoLogo", "-NoProfile", "-Command", ps_script], 
                        capture_output=True, text=True)

print(result.stdout)
if result.stderr:
    print("STDERR:", result.stderr, file=sys.stderr)
    


# 4.3 Check database status

In [None]:
# 5. Check database status
import subprocess, sys

ps_script = r"""
$fhirBase = "http://localhost:8080/fhir"
$results = [ordered]@{}

Write-Output "`n5. Checking database status..."
try {
    # Try to get server status
    if ((Get-Command "docker" -ErrorAction SilentlyContinue)) {
        $stats = docker stats hapi --no-stream --format "{{.CPUPerc}}|{{.MemPerc}}|{{.MemUsage}}"
        $statsArray = $stats -split "\|"
        if ($statsArray.Count -eq 3) {
            $results["CPU Usage"] = $statsArray[0]
            $results["Memory Usage"] = "$($statsArray[1]) ($($statsArray[2]))"
            Write-Output "   Server resources: CPU: $($statsArray[0]), Memory: $($statsArray[1]) ($($statsArray[2]))"
        }
    } else {
        Write-Output " Docker command not available, skipping resource check"
    }
} catch {
    $errorMessage = $_.Exception.Message
    Write-Output " Could not check server resources: $errorMessage"
}
"""

print("Running comprehensive HAPI FHIR server health check...")
result = subprocess.run(["powershell", "-NoLogo", "-NoProfile", "-Command", ps_script], 
                        capture_output=True, text=True)

print(result.stdout)
if result.stderr:
    print("STDERR:", result.stderr, file=sys.stderr)


In [None]:
import subprocess, sys
ps_script = r"""
$fhirBase = "http://localhost:8080/fhir"
$results = [ordered]@{}

# 6. Overall status
$overallStatus = if ($results["Basic Connectivity"] -like "*") { "OPERATIONAL" } else { "NOT OPERATIONAL" }
$results["Overall Status"] = $overallStatus

Write-Output "`n========== SUMMARY =========="
Write-Output "HAPI FHIR Server status: $overallStatus"

# Return the results object for further processing
$results | ConvertTo-Json -Depth 3
"""

print("Running comprehensive HAPI FHIR server health check...")
result = subprocess.run(["powershell", "-NoLogo", "-NoProfile", "-Command", ps_script], 
                        capture_output=True, text=True)

print(result.stdout)
if result.stderr:
    print("STDERR:", result.stderr, file=sys.stderr)

# 4.4 File‑to‑server parity

In [None]:
import subprocess, textwrap, sys

ps_script = textwrap.dedent(r"""
    $FHIR_BASE = "http://localhost:8080/fhir/"
    Write-Output "Retrieving resource counts via standard search..."
    
    # Common FHIR resource types in Synthea data
    $resourceTypes = @(
        'Patient', 'Practitioner', 'Organization',
        'Encounter', 'Condition', 'Observation', 
        'Procedure', 'MedicationRequest', 'Immunization',
        'AllergyIntolerance', 'CarePlan', 'DiagnosticReport',
        'Goal', 'Medication'
    )
    
    $resourceCounts = [ordered]@{}
    $total = 0
    
    foreach ($type in $resourceTypes) {
        try {
            Write-Output "Checking $type resources..."
            $response = Invoke-RestMethod -Uri "$FHIR_BASE$type`?_summary=count" -Method Get -TimeoutSec 30
            $count = $response.total
            $resourceCounts[$type] = $count
            $total += $count
            Write-Output "  -> Found $count $type resources"
        } catch {
            $errorMessage = $_.Exception.Message
            Write-Output "  -> Error checking $type - $errorMessage"
            $resourceCounts[$type] = 0
        }
    }
    
    $resourceCounts["TOTAL"] = $total
    Write-Output "`nTOTAL RESOURCES: $total"
    
    # Output JSON summary
    $resourceCounts | ConvertTo-Json -Depth 2
""")

print("Retrieving resource counts from server...")
result = subprocess.run(["powershell", "-NoLogo", "-NoProfile", "-Command", ps_script],
                        capture_output=True, text=True)

print(result.stdout)
if result.stderr:
    print("STDERR:", result.stderr, file=sys.stderr)

# 4.3 Spot‑check a patient graph

In [None]:
import subprocess

# Define PowerShell script to check a random patient's data
ps_script = """
$FHIR_BASE = "http://localhost:8080/fhir/"
$id = (Invoke-RestMethod -Uri ($FHIR_BASE + "Patient?_count=1")).entry[0].resource.id
Write-Output "Random Patient ID = $id"
$everything = Invoke-RestMethod -Uri ($FHIR_BASE + "Patient/$id/`$everything")
Write-Output ("Resources linked to patient: " + $everything.entry.Count)
"""

# Execute PowerShell script
result = subprocess.run(["powershell", "-Command", ps_script], capture_output=True, text=True)
print(result.stdout)
if result.stderr:
    print("Error:", result.stderr)

## 5  Next steps
* Enable `$export` and time how long NDJSON generation takes.
* Switch persistence to PostgreSQL for multi‑million‑resource load.
* Layer on SMART‑on‑FHIR or OAuth if you need auth.

---
*Notebook generated automatically from our ChatGPT session (June 2025).*