In [1]:
import torch
import re
import os
import json
import pickle
import openai
import pandas as pd
import numpy as np
import glob
from tqdm import tqdm
from dotenv import load_dotenv

### Dotenv

In [2]:
load_dotenv()
openai.api_key = os.environ['OPENAI_API']

### DataFrames

In [13]:
root = "../../../data/OpenAI/DataFrames/"

file = "DF_Andrei.csv"
df_Andrei = pd.read_csv(F"{root}{file}", header=[0, 1], index_col=0)\
    .rename_axis('Species', axis='index')
df_Andrei_species = list(df_Andrei.index)


file = "DF_Daniel.csv"
df_Daniel = pd.read_csv(F"{root}{file}", header=[0, 1], index_col=0)\
    .rename_axis('Species', axis='index')\
    .dropna()
df_Daniel_species = list(df_Daniel.index)

file = "DF_Pierre.csv"
df_Pierre = pd.read_csv(F"{root}{file}", header=[0, 1], index_col=0)\
    .rename_axis('Species', axis='index')
df_Pierre_species = list(df_Pierre.index)

### Trait Dicts

In [24]:
folder_traits = "../../../data/OpenAI/Traits/"

with open(F"{folder_traits}Andrei.json", 'r') as f:
  caribbean_traits_dict = json.load(f)

with open(F"{folder_traits}Andrei.json", 'r') as f:
  caribbean_traits_dict = json.load(f)

with open(F"{folder_traits}Andrei.json", 'r') as f:
  caribbean_traits_dict = json.load(f)

### Functions

In [25]:
def combine_words_with_capital(string):
    # remove non-alphanumeric characters
    string = re.sub(r'[^\w\s]', '', string)  
    # combine words with capitalization
    string = ''.join(word.capitalize() for word in string.split())  
    return string

### Text Data
#### Caribbean

In [4]:
paragraph_folder = "../../../data/OpenAI/DescriptionSnippets/Paragraphs/"
sentence_folder = "../../../data/OpenAI/DescriptionSnippets/Sentences/"

caribbean_jsons_paras = glob.glob(F"{paragraph_folder}c*")
caribbean_jsons_sents = glob.glob(F"{sentence_folder}c*")

caribbean_jsons_paras.sort()
caribbean_jsons_sents.sort()

In [30]:
# Define the path to the directory where the prompts and results will be saved
folder_prompts = "../../../data/OpenAI/PromptsResults/"

# Loop over each JSON file in the list caribbean_jsons_paras
for json_file in caribbean_jsons_paras:

    try:
        # Try to read in the JSON file
        with open(json_file, 'r') as f:
            caribbean_species_paragraph = json.load(f)
        
        # Get the name of the species from the keys of the dictionary in the JSON file
        species = list(caribbean_species_paragraph.keys())[0]
    except:
        # If the JSON file cannot be read in, get the name of the species from the file name
        species = json_file[62:-29].replace('_', ' ')
        continue
    
    # Join the paragraphs for the species into a single text string
    text = " ".join(caribbean_species_paragraph[species])

    # Replace spaces in the species name with underscores
    folder_species = species.replace(' ', '_')

    # Try to create a directory for the prompts for the species
    try:
        os.makedirs(F"{folder_prompts}{folder_species}")
    except FileExistsError:
        pass

    # Loop over each trait and trait options in the caribbean_traits_dict dictionary
    for trait, trait_options in (pbar := tqdm(caribbean_traits_dict.items(), leave=False, position=0)):
        pbar.set_description(f"{species}")

        # Create the question and options for the ChatGPT prompt
        question = F"Which of the following values correctly describe(s) the 'Leaf composition' trait mentioned in the text? Please select all that apply."
        options = trait_options + ["None of the above", "Someting else (please specify)"]
        user_content = F"Question: {question}  \n\nValues: {options}"

        # Create the messages to send to the ChatGPT API
        messages = [
            {"role": "assistant", "content": text},
            {"role": "user", "content": user_content}
            ]
        # Call the ChatGPT API to generate a completion for the prompt
        # completion = openai.ChatCompletion.create(
        #     model = "gpt-3.5-turbo",
        #     messages = messages,
        # )

        # # Combine the words in the trait name with capital letters and use this as the file name
        # file_name = combine_words_with_capital(trait)
        # # Save the completion to a JSON file with the file name in the species directory
        # with open(F"{folder_prompts}{folder_species}/{file_name}.json", 'w') as fp:
        #     json.dump(completion, fp)
            


                                                                            