In [56]:
import json
import numpy as np
import pandas as pd
import glob
import re
from tqdm import tqdm


In [55]:
def remove_newlines(d: dict) -> dict:
    """
    Removes newline characters from string values in a dictionary, and converts any 'None of the above' values to a list
    with a single element 'None of the above'. Also strips square brackets from values that are lists and splits them
    into separate string elements.
    Args:
        d: The input dictionary to process.

    Returns:
        The processed dictionary with newlines removed and 'None of the above' values as lists.

    """
    for k, v in d.items():
        d[k] = v.replace('\n', '').strip()
        if d[k] == 'None of the above.':
            d[k] = ['None of the above']
        else:
            d[k] = [val.strip() for val in d[k].strip('[]').split(',')]
    return d


def strip_quotes(d: dict) -> dict:
    """
    Removes quotes around string values in a dictionary, and converts lists of quoted string values into lists of
    unquoted string values.
    Args:
        d: The input dictionary to process.

    Returns:
        The processed dictionary with quotes removed from string values and lists of unquoted string values.
    """
    new_dict = {}
    for k, v in d.items():
        if isinstance(v, list):
            new_list = []
            for item in v:
                if isinstance(item, str):
                    new_list.append(item.strip().strip("'"))
                else:
                    new_list.append(item)
            new_dict[k] = new_list
        else:
            new_dict[k] = v.strip().strip("'")
    return new_dict

### Open Data

In [2]:
folder_traits = "../../../data/OpenAI/Traits/"

with open(F"{folder_traits}Andrei.json", 'r') as f:
  caribbean_traits_dict = json.load(f)

In [48]:
folder_prompts = "../../../data/OpenAI/PromptsResults/"

species_folders = glob.glob(F"{folder_prompts}*")
species_folders[0:3]

['../../../data/OpenAI/PromptsResults/Amyris_ignea',
 '../../../data/OpenAI/PromptsResults/Avicennia_germinans',
 '../../../data/OpenAI/PromptsResults/Bursera_tomentosa']

In [54]:
prompt_results_dict = {}

for idx, species_folder in enumerate(species_folders):
    species_name = species_folder[36:60].replace('_', ' ')
    
    json_list = glob.glob(F"{species_folder}/*")

    for json_file in (pbar := tqdm(json_list, leave=False, position=0)):
        pbar.set_description(f"{idx}: {species_name}")

        trait_dict = {}

        # Trait name
        trait = json_file[56:-5]

        # Try to read in the JSON file
        with open(json_file, 'r') as f:
            prompt_result = json.load(f)

        trait_dict[trait] = prompt_result['choices'][0]['message']['content']

    prompt_results_dict[species_name] = trait_dict

Amyris ignea
Avicennia germinans
Bursera tomentosa
Bursera simaruba
Bourreria succulenta


In [4]:
avicennia_json_list = glob.glob(F"{species_folders[1]}/*")
avicennia_json_list[0:4]

['../../../data/OpenAI/PromptsResults/Avicennia_germinans/PetalsCorollaShape.json',
 '../../../data/OpenAI/PromptsResults/Avicennia_germinans/PetalsCorollaColour.json',
 '../../../data/OpenAI/PromptsResults/Avicennia_germinans/SepalsCalyxShape.json',
 '../../../data/OpenAI/PromptsResults/Avicennia_germinans/LeafPosition.json']

In [43]:
prompt_results_dict = {}

for json_file in avicennia_json_list:

    # Trait name
    trait = json_file[56:-5]

    # Try to read in the JSON file
    with open(json_file, 'r') as f:
        prompt_result = json.load(f)

    prompt_results_dict[trait] = prompt_result['choices'][0]['message']['content']