In [1]:
import csv
import re
from tqdm import tqdm

def validate_csv_file(csv_file_path):
    # Open the CSV file
    with open(csv_file_path, 'r') as file:
        reader = csv.DictReader(file)

        for row_number, row in tqdm(enumerate(reader, start=1)):
            # Validate each row

            # Check hospital_id
            hospital_id = row.get('hospital_id')
            if not hospital_id or len(hospital_id) != 6 or not hospital_id.isalnum():
                print(f"Invalid hospital_id in row {row_number}")

            # Check description
            description = row.get('description')
            if description and len(description) > 2000:
                print(f"Invalid description in row {row_number}")

            # Check rev_code
            rev_code = row.get('rev_code')
            if rev_code and not re.match(r'^[0-9]{4}$', rev_code):
                print(f"Invalid rev_code in row {row_number}")

            # Check local_code (no specific validation mentioned in the schema)

            # Check code (no specific validation mentioned in the schema)

            # Check ms_drg
            ms_drg = row.get('ms_drg')
            if ms_drg and not re.match(r'^[0-9]{3}$', ms_drg):
                print(f"Invalid ms_drg in row {row_number}")

            # Check apr_drg
            apr_drg = row.get('apr_drg')
            if apr_drg and not re.match(r'^[0-9]{3}$|^[0-9]{3}-[0-4]$', apr_drg):
                print(f"Invalid apr_drg in row {row_number}")

            # Check eapg (no specific validation mentioned in the schema)

            # Check hcpcs_cpt
            hcpcs_cpt = row.get('hcpcs_cpt')
            if hcpcs_cpt and not re.match(r'^[A-Z][0-9]{4}$|^[0-9]{5}$|^[0-9]{4}[A-Z]$', hcpcs_cpt):
                print(f"Invalid hcpcs_cpt in row {row_number}")

            # Check modifiers
            modifiers = row.get('modifiers')
            if modifiers and not re.match(r'^[A-Z0-9]{2}(\\|[A-Z0-9]{2}){0,3}$', modifiers):
                print(f"Invalid modifiers in row {row_number}")

            # Check alt_hcpcs_cpt (no specific validation mentioned in the schema)

            # Check thru (no specific validation mentioned in the schema)

            # Check apc
            apc = row.get('apc')
            if apc and not re.match(r'^[0-9]{4}$', apc):
                print(f"Invalid apc in row {row_number}")

            # Check icd (no specific validation mentioned in the schema)

            # Check ndc (no specific validation mentioned in the schema)

            # Check drug_hcpcs_multiplier (no specific validation mentioned in the schema)

            # Check drug_quantity (no specific validation mentioned in the schema)

            # Check drug_unit_of_measurement (no specific validation mentioned in the schema)

            # Check drug_type_of_measurement (no specific validation mentioned in the schema)

            # Check billing_class (no specific validation mentioned in the schema)

            # Check setting (no specific validation mentioned in the schema)

            # Check payer_category
            payer_category = row.get('payer_category')
            if payer_category not in ['gross', 'cash', 'min', 'max', 'payer']:
                print(f"Invalid payer_category in row {row_number}")

            # Check payer_name and plan_name
            payer_name = row.get('payer_name')
            plan_name = row.get('plan_name')
            if not payer_name and not plan_name:
                print(f"Invalid payer_name and plan_name in row {row_number}")

            # Check standard_charge and standard_charge_percent
            standard_charge = row.get('standard_charge')
            standard_charge_percent = row.get('standard_charge_percent')
            contracting_method = row.get('contracting_method')
            if not standard_charge and not standard_charge_percent and contracting_method != 'other':
                print(f"Invalid standard_charge and standard_charge_percent in row {row_number}")
            elif standard_charge and len(standard_charge) > 14:
                print(f"Invalid standard_charge length in row {row_number}")
            elif standard_charge_percent and len(standard_charge_percent) > 6:
                print(f"Invalid standard_charge_percent length in row {row_number}")

            # Check contracting_method (no specific validation mentioned in the schema)

            # Check additional_generic_notes (no specific validation mentioned in the schema)

            # Check additional_payer_specific_notes (no specific validation mentioned in the schema)

# Usage example
csv_file_path = 'G:\\transparency-in-pricing\\output_files\\450419_harris-methodist-hospital-azle.csv'
validate_csv_file(csv_file_path)


6146983it [00:40, 151878.62it/s]
