# Import Libraries

In [56]:
import ijson
import re
import csv
import time

In [57]:
INPUT_FILE = "../2024-05-01_anthem_index.json"
OUTPUT_FILE = "ANTHEM_PPO_NY_URLS.csv"

In [58]:
def check_if_plan_is_anthem_ppo(plan_name: str) -> bool:
    # Check if plan_name has ATHEM ... PPO
    pattern = re.compile(r'ANTHEM.*PPO', re.DOTALL | re.IGNORECASE)
    match = pattern.search(plan_name)
    return True if match else False

In [59]:
def filter_url_based_on_ANTHEM_NY(url: str, code_empire:str = "254_39") -> bool:

    # break URL on 
    parts = url.split('.gz?')

    if len(parts) < 2:
        return False
    
    # Example: https://anthembcbsky.mrf.bcbs.com/2024-05_254_39B0_in-network-rates_1_of_10.json.gz?
    first_part_url = parts[0]

    # Check if it's "Empire BlueCross BlueShield"
    if code_empire in first_part_url:
        return True
    else:
        return False

In [60]:
start_time = time.time()

In [61]:
with open(INPUT_FILE, 'r') as file, open(OUTPUT_FILE, 'w', newline='') as csvfile:

    # Output file
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(["URL"])
    
    # Iterating over each reporting structure
    reporting_structure = ijson.items(file, 'reporting_structure.item')
    i = 0
    for report in reporting_structure:
        
        # Analyze for ANTHEM PPO
        has_anthem_ppo = False
        for reporting_plans in report['reporting_plans']:

            # Get plan_name
            plan_name = reporting_plans.get('plan_name')
            if plan_name:
                check_if_plan_is_ANTHEM_PPO = check_if_plan_is_anthem_ppo(plan_name)
                if check_if_plan_is_ANTHEM_PPO:
                    has_anthem_ppo = True

        # If plan_name is ANTHEM PPO search for NY state
        # !! ANTHEM in NY is called "Empire BlueCross BlueShield"
        # Need to find the in_network files for this particular name

        # "Empire BlueCross BlueShield" => code 254 39__
        if has_anthem_ppo:
            for network_file in report.get('in_network_files'):

                # Need to fier for
                url_location = network_file.get('location')
                if url_location:
                    is_NY = filter_url_based_on_ANTHEM_NY(url_location)
                    if is_NY:
                        csv_writer.writerow([url_location])

In [62]:
# End timing the execution
end_time = time.time()

# Calculate and print the total execution time
execution_time = end_time - start_time
print(f"Total execution time: {execution_time:.2f} seconds")

Total execution time: 46.84 seconds
