## Step 1. Import Modules

In [9]:
import pandas as pd
import json
import os

## Step 2. Specify the file paths

In [10]:
json_dir = 'opengeometadata'  # Directory containing the JSON files
reference_uris_file_path = 'aardvark-profile/referenceURIs.csv'  # CSV mapping reference URIs and labels
full_schema_file_path = 'aardvark-profile/aardvark.csv'  # CSV mapping OGM Aardvark fields and labels
csv_file_path = 'opengeometadata.csv'  # Path to the output CSV file

## Step 3. Define functions

In [11]:
def convert_json_to_csv(json_dir, reference_uris_file_path, full_schema_file_path, csv_file_path):
    # Load the reference URIs data
    reference_uris_data = pd.read_csv(reference_uris_file_path)
    reference_uri_dict = {v: k for k, v in dict(zip(reference_uris_data['LABEL'], reference_uris_data['URI'])).items()}
    # Load the full schema data
    full_schema_data = pd.read_csv(full_schema_file_path)
    
    # List to hold the CSV data
    csv_data_list = []

    # Traverse the directory tree recursively
    for root, dirs, files in os.walk(json_dir):
        for filename in files:
            if filename.endswith(".json"):
                file_path = os.path.join(root, filename)
                with open(file_path, 'r') as json_file:
                    json_data = json.load(json_file)
                    
                    csv_row = {}
                    for _, schema_row in full_schema_data.iterrows():
                        label = schema_row['Label']
                        field_name = schema_row['Field Name']
                        field_type = schema_row['Field Type']
                        
                        if field_name in json_data:
                            if field_name == "dct_references_s":  # Handle references separately
                                references = json.loads(json_data[field_name])
                                for uri, url in references.items():
                                    if uri in reference_uri_dict:
                                        csv_row[reference_uri_dict[uri]] = url
                            elif field_type == "Array":
                                csv_row[label] = '|'.join(json_data[field_name])
                            else:
                                csv_row[label] = json_data[field_name]
                    
                    csv_data_list.append(csv_row)

    # Convert the list of dictionaries to a DataFrame
    csv_df = pd.DataFrame(csv_data_list, columns=full_schema_data['Label'])

    # Write the DataFrame to a CSV file
    csv_df.to_csv(csv_file_path, index=False)

## Step 4: Execute the function

In [12]:
# Convert the JSON files to a single CSV file
convert_json_to_csv(json_dir, reference_uris_file_path, full_schema_file_path, csv_file_path)

# Print a message indicating completion
print(f'CSV file generated at: {csv_file_path}')

CSV file generated at: opengeometadata.csv
