In [18]:
import json
import glob
import os

In [5]:
def load_json_data(file_path):
    """
    Load data from a JSON file.

    Args:
        file_path (str): Path to the JSON file

    Returns:
        dict: Dictionary containing the drug label data

    Raises:
        FileNotFoundError: If the file doesn't exist
        json.JSONDecodeError: If the file contains invalid JSON
    """
    try:
        with open(file_path, 'r') as file:
            return json.load(file)
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return {}
    except json.JSONDecodeError:
        print(f"Error: File '{file_path}' contains invalid JSON.")
        return {}

In [14]:
def parse_upc_ndc_mapping(data):
    """
    Parse through the drug label JSON data and create a mapping of UPC to NDC.

    Args:
        data (dict): Dictionary containing drug label information

    Returns:
        dict: Dictionary mapping UPCs to their corresponding NDCs
    """
    upc_ndc_map = {}
    for obj in data['results']:
        if 'openfda' in obj:
            if 'upc' in obj['openfda'] and 'product_ndc' in obj['openfda']:
                for upc in obj['openfda']['upc']:
                    for ndc in obj['openfda']['product_ndc']:
                        upc_ndc_map[upc] = ndc

    return upc_ndc_map

In [22]:
json_directory = "C:\\Users\\andre\\Downloads\\upc_ndc"  # Change this to your directory path
json_files = glob.glob(os.path.join(json_directory, "drug-label-*.json"))

# Initialize empty data dictionary
data = {}
data['results'] = []

# Load and combine data from all JSON files
for json_file in json_files:
    print(f"Loading {json_file}...")
    file_data = load_json_data(json_file)
    if file_data and 'results' in file_data:
        data['results'].extend(file_data['results'])

if not data:
    print("No data loaded. Exiting.")
    exit()

mapping = parse_upc_ndc_mapping(data)

# Print the results
print("UPC to NDC Mapping:")
for upc, ndc in mapping.items():
    print(f"UPC: {upc} -> NDC: {ndc}")
    break

# Save the mapping to a text file
output_file = "upc_ndc_mapping.txt"
try:
    with open(output_file, 'w') as f:
        for upc, ndc in mapping.items():
            f.write(f"{upc},{ndc}\n")
    print(f"\nMapping saved to {output_file}")
except IOError:
    print(f"Error: Unable to write to file '{output_file}'")

Loading C:\Users\andre\Downloads\upc_ndc\drug-label-0001-of-0013.json...
Loading C:\Users\andre\Downloads\upc_ndc\drug-label-0002-of-0013.json...
Loading C:\Users\andre\Downloads\upc_ndc\drug-label-0003-of-0013.json...
Loading C:\Users\andre\Downloads\upc_ndc\drug-label-0004-of-0013.json...
Loading C:\Users\andre\Downloads\upc_ndc\drug-label-0005-of-0013.json...
Loading C:\Users\andre\Downloads\upc_ndc\drug-label-0006-of-0013.json...
Loading C:\Users\andre\Downloads\upc_ndc\drug-label-0007-of-0013.json...
Loading C:\Users\andre\Downloads\upc_ndc\drug-label-0008-of-0013.json...
Loading C:\Users\andre\Downloads\upc_ndc\drug-label-0009-of-0013.json...
Loading C:\Users\andre\Downloads\upc_ndc\drug-label-0010-of-0013.json...
Loading C:\Users\andre\Downloads\upc_ndc\drug-label-0011-of-0013.json...
Loading C:\Users\andre\Downloads\upc_ndc\drug-label-0012-of-0013.json...
Loading C:\Users\andre\Downloads\upc_ndc\drug-label-0013-of-0013.json...
UPC to NDC Mapping:
UPC: 0300090020019 -> NDC: 6729

In [23]:
len(mapping)

34683