## Step 1. Import Modules

In [5]:
import pandas as pd
import json
import os

## Step 2. Specify the file paths

In [6]:
csv_file_path = 'template-filled.csv'  # the name of your CSV
reference_uris_file_path = '../aardvark-profile/referenceURIs.csv'  # CSV mapping reference URIs and labels
full_schema_file_path = '../aardvark-profile/aardvark.csv'  # CSV mapping OGM Aardvark fields and labels
output_dir = 'json_output'  # Output directory

## Step 3. Define the function

In [7]:
def convert_csv_to_json(csv_file_path, reference_uris_file_path, full_schema_file_path, output_dir):
    # Load the CSV data
    csv_data = pd.read_csv(csv_file_path)
    # Load the reference URIs data
    reference_uris_data = pd.read_csv(reference_uris_file_path)
    reference_uri_dict = dict(zip(reference_uris_data['LABEL'], reference_uris_data['URI']))
    # Load the full schema data
    full_schema_data = pd.read_csv(full_schema_file_path)
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Function to handle multivalued fields
    def split_multivalues(val):
        return val.split('|') if pd.notna(val) and '|' in val else [val]
    
    # Function to construct JSON data from a row
    def construct_json_data(row):
        json_data = {}
        for _, schema_row in full_schema_data.iterrows():
            label = schema_row['Label']
            field_name = schema_row['Field Name']
            field_type = schema_row['Field Type']
            
            if field_name in ["dct_references_s"]:  # Handle references separately
                references = {}
                for ref_label in reference_uri_dict.keys():
                    if pd.notna(row.get(ref_label)):
                        references[reference_uri_dict[ref_label]] = row[ref_label]
                if references:
                    json_data[field_name] = json.dumps(references)
            elif pd.notna(row.get(label)):
                if field_type == "Array":
                    json_data[field_name] = split_multivalues(row.get(label, ""))
                else:
                    json_data[field_name] = row.get(label, "")
        
        json_data["gbl_mdVersion_s"] = "Aardvark"
        return json_data

    # Iterate over each row in the CSV and generate JSON files
    for index, row in csv_data.iterrows():
        json_data = construct_json_data(row)
        
        # Determine the file name based on the ID or index
        file_name = f"{row.get('ID', index)}.json"
        file_path = os.path.join(output_dir, file_name)
        
        # Write the JSON data to a file
        with open(file_path, 'w') as json_file:
            json.dump(json_data, json_file, indent=4)

## Step 4: Run the script

In [8]:
# Convert the CSV to individual JSON files
convert_csv_to_json(csv_file_path, reference_uris_file_path, full_schema_file_path, output_dir)

# Print a message indicating completion
print(f'JSON files generated in directory: {output_dir}')

JSON files generated in directory: json_output
