In [1]:
from flask import Flask, request, jsonify
import jsonschema
from jsonschema import validate, ValidationError
import requests
import logging
import json
import threading
import time
import openai
from typing import Dict, Any, Optional
import socket
from dotenv import load_dotenv
from openai import OpenAI
import requests
import os
import random
from faker import Faker
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
from collections import Counter
import time
from tqdm import tqdm
from datetime import datetime

In [2]:
path = '/home/cptaswadu/RESCUE-n8n/insurance'
load_dotenv(dotenv_path=os.path.join(path, '.env')) 
openai_api_key = os.getenv("OPEN_AI_API_KEY")
gpt_client = OpenAI(api_key=openai_api_key)

In [3]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [4]:
CT_Medicaid_WES = {
  "Member_Information": {
    "Member_ID": "string",
    "Last_Name": "string",
    "First_Name": "string",
    "DOB": "string (YYYY-MM-DD)",
    "Age": "integer",
    "Address": "string",
    "City": "string",
    "State": "string",
    "Zip": "string",
    "Primary_Diagnosis": "string",
    "Date_of_Service": "string (YYYY-MM-DD)"
  },
  "Requested_Tests": {
    "type": "array",
    "items": {
      "enum": ["81415", "81416", "81417", "81425", "81426", "81427"]
    }
  },
  "Rationale_for_testing": {
    "type": "object",
    "properties": {
      "Selected_Reasons": {
        "type": "array",
        "items": {
          "enum": [
            "Unexplained congenital or neurodevelopmental disorder(s)",
            "Epilepsy/seizure disorder",
            "Moderate to severe intellectual disability",
            "Multiple genetic anomalies",
            "Congenital heart disease",
            "Other"
          ]
        },
        "description": "Select all applicable reasons for testing."
      },
      "Congenital_Heart_Disease_Type": {
        "type": "string",
        "description": "Required if 'Congenital heart disease' is selected."
      },
      "Other_Reason": {
        "type": "string",
        "description": "Required if 'Other' is selected."
      }
    },
    "description": "Provide rationale for ordering WES. Conditional fields apply based on selected reasons."
  },
  "Prenatal_Testing": {
    "type": "boolean",
    "description": "Is WES being ordered for prenatal testing of a fetus?"
  },
  "Prior_Genetic_Testing": {
    "type": "object",
    "properties": {
      "Has_Prior_Testing": {
        "type": "boolean",
        "description": "Has other genetic testing been performed?"
      },
      "Test_Types": {
        "type": "array",
        "items": {
          "enum": [
            "Microarray",
            "Chromosome/FISH analysis",
            "Single gene testing",
            "Targeted panel testing",
            "Other"
          ]
        },
        "description": "Select all prior tests that have been performed. Required if 'Has_Prior_Testing' is true."
      },
      "Other_Description": {
        "type": "string",
        "description": "Required if 'Other' is selected."
      }
    },
    "description": "Information on previous genetic testing performed."
  },
  "Well_Described_Syndrome": {
    "type": "boolean",
    "description": "Does the clinical picture fit a well-described syndrome?"
  },
"WES_Reanalysis": {
  "type": "object",
  "properties": {
    "New_Gene_Reported": {
      "type": "boolean",
      "description": "Have new gene(s) or gene functions been reported in the literature that relate to the patient’s phenotype?"
    },
    "New_Gene_Reported_Description": {
      "type": "string",
      "description": "If yes, describe and attach supporting literature."
    },
    "New_Symptom_Onset": {
      "type": "boolean",
      "description": "Has there been an onset of new symptoms that broadens the phenotype assessed during the original exome evaluation?"
    },
    "New_Symptom_Description": {
      "type": "string",
      "description": "If yes, describe and attach supporting documentation."
    }
  },
  "description": "Complete this section only if requesting a WES reanalysis."
},
"Billing_Provider_Info": {
    "Medicaid_Billing_Number": "string",
    "Billing_Provider_Name": "string",
    "Street_Address": "string",
    "City": "string",
    "State": "string",
    "Zip": "string",
    "Phone_Number": "string",
    "Fax_Number": "string",
    "Contact_Name": "string"
  },
"Ordering_Provider_Info": {
    "Medicaid_Billing_Number": "string",
    "Ordering_Provider_Name": "string",
    "Street_Address": "string",
    "City": "string",
    "State": "string",
    "Zip": "string",
    "Phone_Number": "string",
    "Fax_Number": "string",
    "Contact_Name": "string"
  },
"Attestation": {
    "Physician_Signature": "string",
    "Date": "string (YYYY-MM-DD)"
  }
}

In [5]:
fake = Faker('en_US')

def generate_synthetic_patients(num_samples=50):
    """Generate synthetic patients for CT Medicaid WES form based on clinical mapping"""
    
    # 매핑 테이블
    clinical_cases = [
        {
            "reason": "Unexplained congenital or neurodevelopmental disorder(s)",
            "case": "Global developmental delay",
            "prior_tests": ["Microarray", "Single gene testing"],
            "cpt_code": "81415",
            "diagnoses": ["Global developmental delay", "Developmental regression"]
        },
        {
            "reason": "Epilepsy/seizure disorder",
            "case": "Epileptic encephalopathy",
            "prior_tests": ["Targeted panel testing", "Microarray"],
            "cpt_code": "81417",
            "diagnoses": ["Epileptic encephalopathy", "Seizures"]
        },
        {
            "reason": "Moderate to severe intellectual disability",
            "case": "Intellectual disability",
            "prior_tests": ["Microarray", "Single gene testing"],
            "cpt_code": "81416",
            "diagnoses": ["Intellectual disability", "Delayed milestones"]
        },
        {
            "reason": "Multiple genetic anomalies",
            "case": "Multiple congenital anomalies",
            "prior_tests": ["Microarray", "Chromosome/FISH analysis"],
            "cpt_code": "81425",
            "diagnoses": ["Multiple congenital anomalies", "Dysmorphic features"]
        },
        # Other cases
        {
            "reason": "Other",
            "case": "Multiple congenital contractures of unknown etiology",
            "other_description": "Multiple congenital contractures of unknown etiology; CMA negative",
            "prior_tests": ["Chromosome/FISH analysis", "Microarray"],
            "cpt_code": "81425",
            "diagnoses": ["Arthrogryposis", "Multiple congenital contractures", "Congenital myopathy"]
        },
        {
            "reason": "Other",
            "case": "Severe non-immune fetal hydrops with negative karyotype and CMA", 
            "other_description": "Severe non-immune fetal hydrops with negative karyotype and CMA; no maternal alloimmunization",
            "prior_tests": ["Karyotype", "Microarray"],
            "cpt_code": "81417",
            "diagnoses": ["Non-immune hydrops fetalis", "Fetal hydrops", "Congenital anomalies"]
        },
        {
            "reason": "Other",
            "case": "Neonatal Hypotonia",
            "other_description": "Severe hypotonia with negative metabolic and genetic screening; syndromic cause suspected",
            "prior_tests": ["Single gene testing", "Microarray"],
            "cpt_code": "81415",
            "diagnoses": ["Neonatal hypotonia", "Floppy infant syndrome", "Congenital myopathy"]
        },
        {
            "reason": "Other",
            "case": "Mild ADHD",
            "other_description": "Mild attention deficit with hyperactivity; clinical observation priority",
            "prior_tests": [],
            "cpt_code": "81426",
            "diagnoses": ["Attention deficit hyperactivity disorder", "Behavioral problems", "Developmental delay"]
        },
        {
            "reason": "Other",
            "case": "Isolated speech delay",
            "other_description": "Isolated speech delay without other developmental concerns",
            "prior_tests": [],
            "cpt_code": "81426",
            "diagnoses": ["Speech delay", "Language disorder", "Communication disorder"]
        },
        
        # Congenital heart disease cases
        {
            "reason": "Congenital heart disease",
            "case": "Tetralogy of Fallot",
            "heart_disease_type": "Tetralogy of Fallot",
            "prior_tests": ["Chromosome/FISH analysis", "Microarray"],
            "cpt_code": "81425",
            "diagnoses": ["Tetralogy of Fallot", "Congenital heart disease", "Cyanotic heart disease"]
        },
        {
            "reason": "Congenital heart disease",
            "case": "Atrial septal defect",
            "heart_disease_type": "Atrial septal defect",
            "prior_tests": [],
            "cpt_code": "81426",
            "diagnoses": ["Atrial septal defect", "Congenital heart disease", "Acyanotic heart disease"]
        },
        {
            "reason": "Congenital heart disease",
            "case": "Extracardiac anomalies",
            "heart_disease_type": "Complex congenital heart disease with extracardiac anomalies",
            "prior_tests": ["Chromosome/FISH analysis", "Microarray"],
            "cpt_code": "81415",
            "diagnoses": ["Complex congenital heart disease", "Multiple congenital anomalies", "Syndromic heart disease"]
        },
                {
            "reason": "Congenital heart disease",
            "case": "Small isolated VSD",
            "heart_disease_type": "Small isolated ventricular septal defect",
            "prior_tests": [],
            "cpt_code": None,
            "diagnoses": ["Ventricular septal defect", "Small VSD", "Congenital heart disease"]
        },
        {
            "reason": "Congenital heart disease",
            "case": "Anatomical abnormality (complex)",
            "heart_disease_type": "Complex anatomical heart abnormality",
            "prior_tests": ["Chromosome/FISH analysis", "Microarray"],
            "cpt_code": "81415",
            "diagnoses": ["Complex congenital heart disease", "Anatomical heart abnormality", "Structural heart disease"]
        }
    ]
    
    gene_findings = [
        {
            "Gene": "SYNGAP1",
            "Associated_Symptoms": [
                "Global developmental delay",
                "Moderate to severe intellectual disability",
                "Autism spectrum disorder",
                "Seizures"
                ],
            "Phenotypic_Notes": "Often presents with ID and epilepsy; new literature links to ASD spectrum."
        },
        {
            "Gene": "SCN1A",
            "Associated_Symptoms": [
                "Epileptic encephalopathy",
                "Recurrent seizures",
                "Developmental regression"
                ],
            "Phenotypic_Notes": "Associated with Dravet syndrome and early-onset epilepsy."
        },
        {
            "Gene": "KCNQ2",
            "Associated_Symptoms": [
                "Neonatal hypotonia",
                "Seizures",
                "Developmental delay"
                ],
            "Phenotypic_Notes": "Early infantile epileptic encephalopathy with variable hypotonia."
        },
        {
            "Gene": "COL4A1",
            "Associated_Symptoms": [
                "Congenital hypotonia",
                "Brain malformations",
                "Seizures",
                "Developmental delay"
                ],
            "Phenotypic_Notes": "Can cause periventricular leukomalacia, stroke-like episodes, and muscle tone issues."
        },
        {
            "Gene": "SLC2A1",
            "Associated_Symptoms": [
                "Developmental delay",
                "Seizures",
                "Movement disorders"
                ],
            "Phenotypic_Notes": "Linked to GLUT1 deficiency; symptoms improve with ketogenic diet."
        },
        {
            "Gene": "TSC2",
            "Associated_Symptoms": [
                "Seizures",
                "Cortical tubers",
                "Skin findings",
                "Developmental delay"
                ],
            "Phenotypic_Notes": "Tuberous sclerosis; often diagnosed after onset of epilepsy and skin findings."
        },
        {
            "Gene": "CDKL5",
            "Associated_Symptoms": [
                "Developmental regression",
                "Epileptic encephalopathy",
                "Microcephaly"
                ],
            "Phenotypic_Notes": "Early-onset epilepsy with poor developmental outcomes; X-linked dominant."
        },
        {
            "Gene": "DCHS1",
            "Associated_Symptoms": [
                "Congenital heart disease",
                "Structural cardiac malformations"
                ],
            "Phenotypic_Notes": "Associated with mitral valve prolapse and later-onset cardiomyopathy."
        },
        {
            "Gene": "FOXP1",
            "Associated_Symptoms": [
                "Expressive language delay",
                "Mild intellectual disability",
                "Autistic features"
                ],
            "Phenotypic_Notes": "Disruption causes neurodevelopmental delay with language involvement."
        }
    ]

    syndrome_info = [
        {"Syndrome_Name": "Noonan Syndrome Spectrum"},
        {"Syndrome_Name": "Marfan Syndrome"},
        {"Syndrome_Name": "Joubert Syndrome"},
        {"Syndrome_Name": "Down Syndrome"},
        {"Syndrome_Name": "DiGeorge Syndrome"},
        {"Syndrome_Name": "Fragile X Syndrome"}
    ]

    patients = []
    
    for i in range(num_samples):
        
        age = random.randint(0, 18) 
        dob = fake.date_between(start_date=f'-{age+1}y', end_date=f'-{age}y')
        service_date = fake.date_between(start_date=dob, end_date='today')
        attestation_date = fake.date_between(start_date=service_date, end_date='today')

        # random case selection
        selected_case = random.choice(clinical_cases)
        selected_prior_test = random.choice(selected_case["prior_tests"]) if selected_case["prior_tests"] else None
        selected_diagnosis = random.choice(selected_case["diagnoses"])
        
        # Syndrome info
        syndrome_present = random.choice([True, False])
        selected_syndrome = random.choice(syndrome_info) if syndrome_present else {}

        # Generate address components
        address = fake.street_address()
        city = fake.city()
        state = fake.state_abbr()
        zip_code = fake.zipcode()
        
        # Generate billing provider address
        billing_address = fake.street_address()
        billing_city = fake.city()
        billing_state = fake.state_abbr()
        billing_zip = fake.zipcode()
        
        # Generate ordering provider address
        ordering_address = fake.street_address()
        ordering_city = fake.city()
        ordering_state = fake.state_abbr()
        ordering_zip = fake.zipcode()
        
        # Prior testing logic
        has_prior_testing = selected_prior_test is not None
        
        patient = {
            "Member_Information": {
                "Member_ID": str(fake.random_number(digits=9, fix_len=True)),
                "Last_Name": fake.last_name(),
                "First_Name": fake.first_name(),
                "DOB": dob.strftime("%Y-%m-%d"),
                "Age": age,
                "Address": address,
                "City": city,
                "State": state,
                "Zip": zip_code,
                "Primary_Diagnosis": selected_diagnosis,
                "Date_of_Service": service_date.strftime("%Y-%m-%d")
            },
            
            "Requested_Tests": [selected_case["cpt_code"]] if selected_case["cpt_code"] else [],
            
            "Rationale_for_testing": {
                "Selected_Reasons": [selected_case["reason"]],
                "Congenital_Heart_Disease_Type": selected_case.get("heart_disease_type", ""),
                "Other_Reason": selected_case.get("other_description", "")
            },
            
            "Prenatal_Testing": age == 0 and random.choice([True, False]),
            
            "Prior_Genetic_Testing": {
                "Has_Prior_Testing": has_prior_testing,
                "Test_Types": [selected_prior_test] if selected_prior_test else [],
                "Other_Description": ""
            },
            
            "Well_Described_Syndrome": syndrome_present,
            "Syndrome_Information": selected_syndrome,
            
            "WES_Reanalysis": {
                "New_Gene_Reported": (new_gene := random.choice([True, False])),
                "New_Gene_Reported_Description": random.choice(gene_findings) if new_gene else "",
                "New_Symptom_Onset": (new_symptom := random.choice([True, False])),
                "New_Symptom_Description": random.choice(gene_findings) if new_symptom else ""
            },
                "Billing_Provider_Info": {
                "Medicaid_Billing_Number": str(fake.random_number(digits=10, fix_len=True)),
                "Billing_Provider_Name": f"Dr. {fake.name()}",
                "Street_Address": billing_address,
                "City": billing_city,
                "State": billing_state,
                "Zip": billing_zip,
                "Phone_Number": fake.phone_number().replace('(', '').replace(')', ''),
                "Fax_Number": fake.phone_number().replace('(', '').replace(')', ''),
                "Contact_Name": fake.name()
            },
            
            "Ordering_Provider_Info": {
                "Medicaid_Billing_Number": str(fake.random_number(digits=10, fix_len=True)),
                "Ordering_Provider_Name": f"Dr. {fake.name()}",
                "Street_Address": ordering_address,
                "City": ordering_city,
                "State": ordering_state,
                "Zip": ordering_zip,
                "Phone_Number": fake.phone_number().replace('(', '').replace(')', ''),
                "Fax_Number": fake.phone_number().replace('(', '').replace(')', ''),
                "Contact_Name": fake.name()
            },
            
            "Attestation": {
                "Physician_Signature": f"Dr. {fake.last_name()}",
                "Date": attestation_date.strftime("%Y-%m-%d")
            }
        }
        
        patients.append(patient)
        print(f"Generated WES patient {i+1}/{num_samples}")
    
    return patients



In [6]:
patients_ex = generate_synthetic_patients(10)
print(patients_ex)

Generated WES patient 1/10
Generated WES patient 2/10
Generated WES patient 3/10
Generated WES patient 4/10
Generated WES patient 5/10
Generated WES patient 6/10
Generated WES patient 7/10
Generated WES patient 8/10
Generated WES patient 9/10
Generated WES patient 10/10
[{'Member_Information': {'Member_ID': '780268554', 'Last_Name': 'Kelly', 'First_Name': 'Tyler', 'DOB': '2014-09-06', 'Age': 10, 'Address': '32213 Ponce Harbors Apt. 403', 'City': 'Valdezstad', 'State': 'GU', 'Zip': '87087', 'Primary_Diagnosis': 'Congenital heart disease', 'Date_of_Service': '2021-09-21'}, 'Requested_Tests': [], 'Rationale_for_testing': {'Selected_Reasons': ['Congenital heart disease'], 'Congenital_Heart_Disease_Type': 'Small isolated ventricular septal defect', 'Other_Reason': ''}, 'Prenatal_Testing': False, 'Prior_Genetic_Testing': {'Has_Prior_Testing': False, 'Test_Types': [], 'Other_Description': ''}, 'Well_Described_Syndrome': False, 'Syndrome_Information': {}, 'WES_Reanalysis': {'New_Gene_Reported'

In [7]:
def form_to_free_text(patient_data):
    """Convert structured (possibly corrupted) WES form to natural free-text narrative."""

    def safe_get(d, key, default='unknown'):
        try:
            return d.get(key, default)
        except Exception:
            return default

    def safe_str(x):
        try:
            return str(x) if x is not None else 'unknown'
        except Exception:
            return 'unknown'

    def format_age_variants(age):
        age_str = safe_str(age)
        if not age_str.isdigit():
            return f"{age_str} years old"
        
        age_num = int(age_str)
        age_words = {1: "one", 2: "two", 3: "three", 4: "four", 5: "five", 
                     6: "six", 7: "seven", 8: "eight", 9: "nine", 10: "ten",
                     11: "eleven", 12: "twelve", 13: "thirteen", 14: "fourteen", 15: "fifteen",
                     16: "sixteen", 17: "seventeen", 18: "eighteen"}
        
        variants = [
            f"{age_num} years old",
            f"aged {age_num}",
            f"a {age_num}-year-old",
            f"{age_num} yrs old",
            f"{age_words.get(age_num, age_num)} years of age" if age_num <= 15 else f"{age_num} years of age"
        ]
        return random.choice(variants)

    def format_date_variants(date_str):
        if not date_str or date_str == 'unknown':
            return date_str
        
        try:
            from datetime import datetime
            date_obj = datetime.strptime(date_str, "%Y-%m-%d")
            
            variants = [
                date_str,  # 2024-01-15
                date_obj.strftime("%b %d, %Y"),  # Jan 15, 2024
                date_obj.strftime("%B %d, %Y"),  # January 15, 2024
                date_obj.strftime("%m/%d/%Y"),   # 01/15/2024
                date_obj.strftime("%d-%b-%Y")
            ]
            return random.choice(variants)
        except:
            return date_str

    def format_service_date_variants(date_str):
        formatted_date = format_date_variants(date_str)
        variants = [
            f"On {formatted_date}",
            f"During the visit on {formatted_date}",
            f"As of {formatted_date}",
            f"On {formatted_date}",
            f"During {formatted_date}"
        ]
        return random.choice(variants)

    def paraphrase_diagnosis(diagnosis):
        paraphrases = {
            "Global developmental delay": [
                "concerns about developmental milestones",
                "delays in reaching expected milestones",
                "developmental challenges"
            ],
            "Developmental regression": [
                "loss of previously acquired skills",
                "regression in developmental progress",
                "backward developmental trajectory"
            ],
            "Epileptic encephalopathy": [
                "seizure-related neurological concerns",
                "epileptic activity affecting brain function",
                "seizure-associated brain dysfunction"
            ],
            "Seizures": [
                "neurological episodes",
                "epileptic events",
                "recurrent seizure activity"
            ],
            "Intellectual disability": [
                "cognitive developmental concerns",
                "learning and intellectual challenges",
                "significant cognitive limitations"
            ],
            "Delayed milestones": [
                "slower achievement of developmental markers",
                "concerns about developmental progress",
                "developmental milestone delays"
            ],
            "Multiple congenital anomalies": [
                "various birth-related abnormalities",
                "complex congenital presentation",
                "multiple structural abnormalities"
            ],
            "Dysmorphic features": [
                "unusual physical characteristics",
                "atypical facial or body features",
                "distinctive physical appearance"
            ],
            "Arthrogryposis multiplex congenita": [
                "multiple joint contractures from birth",
                "congenital joint stiffness",
                "restricted joint movement from birth"
            ],
            "Congenital contractures": [
                "joint stiffness present from birth",
                "limited joint mobility",
                "contracture deformities"
            ],
            "Fetal hydrops": [
                "fluid accumulation in fetal tissues",
                "abnormal fetal fluid retention",
                "fetal edema"
            ],
            "Nonimmune hydrops fetalis": [
                "fetal fluid accumulation of unknown cause",
                "non-antibody related fetal swelling",
                "unexplained fetal hydrops"
            ],
            "Congenital hypotonia": [
                "decreased muscle tone from birth",
                "floppy baby syndrome",
                "poor muscle strength in newborn"
            ],
            "Central hypotonia": [
                "brain-related muscle weakness",
                "neurological muscle tone issues",
                "central nervous system hypotonia"
            ],
            "Neonatal hypotonia": [
                "newborn muscle weakness",
                "poor muscle tone in infancy",
                "floppy infant presentation"
            ],
            "Mild ADHD": [
                "mild attention and hyperactivity concerns",
                "subtle focus and behavioral challenges",
                "minor attention difficulties"
            ],
            "Attention deficit disorder": [
                "focus and concentration issues",
                "attention-related challenges",
                "difficulty maintaining attention"
            ],
            "Expressive language delay": [
                "delayed speech development",
                "difficulty with verbal expression",
                "language output concerns"
            ],
            "Language disorder NOS": [
                "unspecified communication difficulties",
                "general language challenges",
                "speech and language concerns"
            ],
            "Tetralogy of Fallot": [
                "complex cyanotic heart defect",
                "four-part congenital heart condition",
                "cyanotic congenital heart disease"
            ],
            "Atrial septal defect": [
                "hole in heart's upper chambers",
                "opening between heart's atria",
                "atrial wall defect"
            ],
            "Heart defect with extracardiac anomaly": [
                "heart condition with additional abnormalities",
                "cardiac defect plus other anomalies",
                "complex multi-system condition"
            ],
            "Isolated ventricular septal defect": [
                "hole in heart's lower chambers",
                "opening between heart's ventricles",
                "ventricular wall defect"
            ],
            "Complex congenital heart disease": [
                "complicated heart structural abnormalities",
                "multiple cardiac defects",
                "severe congenital heart condition"
            ]
        }
    
        diagnosis_lower = diagnosis.lower()
        for key, alternatives in paraphrases.items():
            if key.lower() in diagnosis_lower:
                return random.choice(alternatives)
    
        return diagnosis.lower()

    def paraphrase_reason(reason):
        paraphrases = {
            "Unexplained congenital or neurodevelopmental disorder(s)": [
                "unexplained congenital abnormalities",
                "congenital issues requiring investigation",
                "unexplained developmental concerns from birth"
            ],
            "Epilepsy/seizure disorder": [
                "seizure-related medical concerns",
                "recurrent unprovoked seizures",
                "recurrent seizure activity"
            ],
            "Moderate to severe intellectual disability": [
                "significant cognitive developmental concerns",
                "substantial learning and intellectual challenges",
                "marked intellectual limitations"
            ],
            "Multiple genetic anomalies": [
                "complex genetic presentation",
                "multiple genetic abnormalities",
                "multiple genetic findings of uncertain inheritance"
            ],
            "Congenital heart disease": [
                "structural heart abnormalities from birth",
                "congenital cardiac defects",
                "heart malformations present at birth"
            ],            
            "Other": [
                "the patient presented with clinical features requiring genetic analysis",
                "clinical scenarios warranting genetic investigation"
            ]
        }

        reason_lower = reason.lower()
        for key, alternatives in paraphrases.items():
            if key.lower() in reason_lower:
                return random.choice(alternatives)
    
        return reason.lower()

    # Extract main sections
    member_info = patient_data.get('Member_Information', {})
    rationale = patient_data.get('Rationale_for_testing', {})
    prior_testing = patient_data.get('Prior_Genetic_Testing', {})
    reanalysis = patient_data.get('WES_Reanalysis', {})
    billing = patient_data.get('Billing_Provider_Info', {})
    ordering = patient_data.get('Ordering_Provider_Info', {})
    attestation = patient_data.get('Attestation', {})

    # Extract member information
    first_name = safe_get(member_info, 'First_Name')
    last_name = safe_get(member_info, 'Last_Name')
    full_name = f"{first_name} {last_name}"
    age = safe_get(member_info, 'Age')
    member_id = safe_get(member_info, 'Member_ID')
    dob = safe_get(member_info, 'DOB')
    diagnosis = safe_get(member_info, 'Primary_Diagnosis')
    service_date = safe_get(member_info, 'Date_of_Service')
    
    # Address components
    address = safe_get(member_info, 'Address')
    city = safe_get(member_info, 'City')
    state = safe_get(member_info, 'State')
    address_area = f"{city}, {state}" if city != 'unknown' and state != 'unknown' else address

    # Introduction templates
    intro_templates = [
        f"{full_name} ({member_id}) is currently {format_age_variants(age)}, born {format_date_variants(dob)} and lives in the area of {address_area}.",
        f"{full_name} ({member_id}) ({format_age_variants(age)}, born {format_date_variants(dob)} and reside in {address_area}) visited our clinic for medical evaluation.",
        f"{full_name} ({member_id}), born {format_date_variants(dob)}, is a resident near {address_area}."
    ]

    text = random.choice(intro_templates) + " "
    text += f"{format_service_date_variants(service_date)}, the main clinical concern was {paraphrase_diagnosis(diagnosis).lower()}. "

    # Rationale for testing
    selected_reasons = rationale.get('Selected_Reasons', [])
    if not isinstance(selected_reasons, list):
        selected_reasons = [safe_str(selected_reasons)]
    rationale_text = ', '.join([paraphrase_reason(r) for r in selected_reasons]) if selected_reasons else 'unknown reason'

    reason_templates = [
        f"Based on clinical presentation, genetic testing was recommended due to {rationale_text}.",
        f"The care team agreed that this genetic test could be helpful in light of {rationale_text.lower()}.",
        f"After discussion, genetic testing was recommended since {rationale_text.lower()}."
    ]
    text += random.choice(reason_templates) + " "

    # Other reason details
    other_reason = rationale.get('Other_Reason', '')
    if other_reason:
        text += f"Further context included: {safe_str(other_reason)}. "

    # Congenital heart disease type
    heart_disease_type = rationale.get('Congenital_Heart_Disease_Type', '')
    if heart_disease_type:
        text += f"The specific cardiac condition was {safe_str(heart_disease_type)}. "

    # Prenatal testing
    is_prenatal = patient_data.get('Prenatal_Testing', False)
    is_prenatal_flag = str(is_prenatal).strip().lower() in ['true', 'yes', '1']

    if is_prenatal_flag:
        text += "This case involves a prenatal evaluation. "

    # Prior genetic testing
    has_prior = prior_testing.get('Has_Prior_Testing', False)
    has_prior_flag = str(has_prior).strip().lower() in ['true', 'yes', '1']

    if has_prior_flag:
        prior_tests = prior_testing.get('Test_Types', [])
        if not isinstance(prior_tests, list):
            prior_tests = [safe_str(prior_tests)]
        prior_tests_str = ', '.join([safe_str(t) for t in prior_tests]) if prior_tests else 'various tests'
        text += f"There is a history of previous genetic tests, including {prior_tests_str} which showed negative results. "

        other_desc = prior_testing.get('Other_Description', '')
        if other_desc:
            text += f"Additional testing details: {safe_str(other_desc)}. "
    else:
        text += "No known prior genetic testing was performed. "

    # Well-described syndrome
    well_described = patient_data.get('Well_Described_Syndrome', False)
    if str(well_described).strip().lower() in ['true', 'yes', '1']:
        syndrome_info = patient_data.get('Syndrome_Information', {})
        if syndrome_info and isinstance(syndrome_info, dict):
            syndrome_name = syndrome_info.get('Syndrome_Name', 'unknown syndrome')
            text += f"The clinical picture fits a well-described syndrome, specifically {syndrome_name}. "
        else:
            text += "No syndrome symptoms are present. "

    # WES Reanalysis
    new_gene_reported = reanalysis.get('New_Gene_Reported', False)
    if str(new_gene_reported).strip().lower() in ['true', 'yes', '1']:
        gene_info = reanalysis.get('New_Gene_Reported_Description', '')
        if gene_info and isinstance(gene_info, dict):
            gene_name = gene_info.get('Gene', 'unknown gene')
            symptoms = ', '.join(gene_info.get('Associated_Symptoms', []))
            notes = gene_info.get('Phenotypic_Notes', '')
            text += f"Recently, there have been new findings related to the {gene_name} gene, which is associated with {symptoms}. Clinical notes indicate: {notes} "

    new_symptom_onset = reanalysis.get('New_Symptom_Onset', False)
    if str(new_symptom_onset).strip().lower() in ['true', 'yes', '1']:
        symptom_info = reanalysis.get('New_Symptom_Description', '')
        if symptom_info and isinstance(symptom_info, dict):
            gene_name = symptom_info.get('Gene', 'unknown gene')
            symptoms = ', '.join(symptom_info.get('Associated_Symptoms', []))
            notes = symptom_info.get('Phenotypic_Notes', '')
            text += f"Additionally, the patient has developed new symptoms that may be related to {gene_name} gene, which is associated with {symptoms}. Clinical notes indicate: {notes} "
    
    # Billing provider information
    if billing:
        billing_name = billing.get('Billing_Provider_Name', 'Unknown')
        billing_num = billing.get('Medicaid_Billing_Number', '????')
        billing_addr = billing.get('Street_Address', 'unknown address')
        billing_city = billing.get('City', '')
        billing_state = billing.get('State', '')
        billing_phone = billing.get('Phone_Number', 'N/A')
        billing_fax = billing.get('Fax_Number', 'N/A')
        billing_contact = billing.get('Contact_Name', 'the staff')
        
        billing_location = f"{billing_city}, {billing_state}" if billing_city and billing_state else billing_addr
        
        text += (
            f"For insurance and billing, {billing_name.replace('Dr. ', '')} "
            f"({billing_num}) was listed as the responsible provider. "
            f"Inquiries regarding coverage could be directed to the clinic's main office at {billing_location}, "
            f"or by phone at {billing_phone}. Relevant documents were typically exchanged via fax ({billing_fax}). "
            f"For administrative questions, {billing_contact} generally managed correspondence. "
        )

    # Ordering provider information
    if ordering:
        ordering_name = ordering.get('Ordering_Provider_Name', 'Unknown')
        ordering_num = ordering.get('Medicaid_Billing_Number', '????')
        ordering_addr = ordering.get('Street_Address', 'unknown address')
        ordering_city = ordering.get('City', '')
        ordering_state = ordering.get('State', '')
        ordering_phone = ordering.get('Phone_Number', 'N/A')
        ordering_fax = ordering.get('Fax_Number', 'N/A')
        ordering_contact = ordering.get('Contact_Name', 'the staff')
        
        ordering_location = f"{ordering_city}, {ordering_state}" if ordering_city and ordering_state else ordering_addr
        
        text += (
            f"The test itself was ordered by {ordering_name} ({ordering_num}) from the practice located at "
            f"{ordering_location}. "
            f"To reach {ordering_name.replace('Dr. ', '')}, contact was typically made through the clinic's main phone number ({ordering_phone}) or by sending documents to their fax ({ordering_fax}). "
            f"For administrative questions, {ordering_contact} generally managed correspondence. "
        )

    # Attestation
    if attestation:
        sign_name = attestation.get('Physician_Signature', 'the responsible physician')
        sign_date = attestation.get('Date', 'N/A')
        text += (
            f"All information and documentation pertaining to this case were finally reviewed and signed off by "
            f"{sign_name} on {format_date_variants(sign_date)}. "
        )

    return text


In [8]:
patient_free_texts_ex = [form_to_free_text(p) for p in patients_ex]

In [9]:
with open('fin_patients_free_text_only_ex.json', 'w', encoding='utf-8') as f:
    json.dump(patient_free_texts_ex, f, indent=2, ensure_ascii=False)

# (구조화 original + free-text를 dict로 합치는 것도 가능)
converted_patients = []
for i, patient in enumerate(patients_ex):
    converted_patients.append({
        "patient_id": f"P{str(i+1).zfill(3)}",
        "structured_form": patient,
        "free_text": form_to_free_text(patient)
    })

with open('fin_wes_patients_with_freetext_ex.json', 'w', encoding='utf-8') as f:
    json.dump(converted_patients, f, indent=2, ensure_ascii=False)

print(json.dumps(converted_patients[0], indent=2, ensure_ascii=False))

{
  "patient_id": "P001",
  "structured_form": {
    "Member_Information": {
      "Member_ID": "780268554",
      "Last_Name": "Kelly",
      "First_Name": "Tyler",
      "DOB": "2014-09-06",
      "Age": 10,
      "Address": "32213 Ponce Harbors Apt. 403",
      "City": "Valdezstad",
      "State": "GU",
      "Zip": "87087",
      "Primary_Diagnosis": "Congenital heart disease",
      "Date_of_Service": "2021-09-21"
    },
    "Requested_Tests": [],
    "Rationale_for_testing": {
      "Selected_Reasons": [
        "Congenital heart disease"
      ],
      "Congenital_Heart_Disease_Type": "Small isolated ventricular septal defect",
      "Other_Reason": ""
    },
    "Prenatal_Testing": false,
    "Prior_Genetic_Testing": {
      "Has_Prior_Testing": false,
      "Test_Types": [],
      "Other_Description": ""
    },
    "Well_Described_Syndrome": false,
    "Syndrome_Information": {},
    "WES_Reanalysis": {
      "New_Gene_Reported": false,
      "New_Gene_Reported_Description": "

In [10]:
print(os.getcwd())

/home/cptaswadu/RESCUE-n8n/insurance/codes
