In [3]:
import os
import subprocess
import dask.dataframe as dd
from dask.delayed import delayed
import pandas as pd

In [4]:
### JSON Parsing to convert Nested JSON into a structured format


#### ------------- This cell will take around 10 min to run ------------- ####

# JQ command to transform JSON to CSV
jq_command = """
gunzip -c 2024-05-01_anthem_index.json.gz |
jq --stream -r '
  select(length == 2 and (. | .[0][-1] == "description" or .[0][-1] == "location")) |
  {(.[0][-1]): .[1]} |
  [ .description, .location ] |
  select(length==2) |
  @csv' >> intermediate.csv
"""
# Execute the JQ command
subprocess.run(jq_command, shell=True, check=True)

CompletedProcess(args='\ngunzip -c 2024-05-01_anthem_index.json.gz |\njq --stream -r \'\n  select(length == 2 and (. | .[0][-1] == "description" or .[0][-1] == "location")) |\n  {(.[0][-1]): .[1]} |\n  [ .description, .location ] |\n  select(length==2) |\n  @csv\' >> intermediate.csv\n', returncode=0)

In [5]:
#### ------------ This cell will take around 3 min to run ---------------- #####


# Define the path to the generated CSV file
file_path = 'intermediate.csv'

# Check if the file exists
if os.path.exists(file_path):
    
    # Read the CSV file using Dask
    df = dd.read_csv(file_path, header=None, names=["Description", "Location"])
    
    # Shift the 'Location' column one row up
    df['ShiftedLocation'] = df['Location'].shift(-1)

    # Replace 'Location' with 'ShiftedLocation'
    df = df.drop('Location', axis=1).rename(columns={'ShiftedLocation': 'Location'})

    # Drop rows with missing values
    df = df.dropna()

    # Filter rows based on conditions
    df = df[df['Description'].str.lower().str.contains('new york') & df['Description'].str.lower().str.contains('ppo')]

    # Compute the result to trigger computation
    result = df.compute()

    # Save the result to a CSV file
    result.to_csv('Anthem_NY_PPO.csv', index=False)
    
    os.remove(file_path)

else:
    print("File does not exist. Please check the jq command for errors.")