In [1]:
import torch
import re
import os
import json
import pickle
import openai
import pandas as pd
import numpy as np
import glob
from tqdm import tqdm
from dotenv import load_dotenv

### Dotenv

In [2]:
load_dotenv()
openai.api_key = os.environ['OPENAI_API']

### DataFrames

In [3]:
root = "../../../data/OpenAI/DataFrames/"

file = "DF_Andrei.csv"
df_Andrei = pd.read_csv(F"{root}{file}", header=[0, 1], index_col=0)\
    .rename_axis('Species', axis='index')
df_Andrei_species = list(df_Andrei.index)


file = "DF_Daniel.csv"
df_Daniel = pd.read_csv(F"{root}{file}", header=[0, 1], index_col=0)\
    .rename_axis('Species', axis='index')\
    .dropna()
df_Daniel_species = list(df_Daniel.index)

file = "DF_Pierre.csv"
df_Pierre = pd.read_csv(F"{root}{file}", header=[0, 1], index_col=0)\
    .rename_axis('Species', axis='index')
df_Pierre_species = list(df_Pierre.index)

### Trait Dicts

In [4]:
folder_traits = "../../../data/OpenAI/Traits/"

with open(F"{folder_traits}Andrei.json", 'r') as f:
  caribbean_traits_dict = json.load(f)

### Text Data

In [5]:
paragraph_folder = "../../../data/OpenAI/DescriptionSnippets/Paragraphs/"
sentence_folder = "../../../data/OpenAI/DescriptionSnippets/Sentences/"

caribbean_jsons_paras = glob.glob(F"{paragraph_folder}c*")
caribbean_jsons_sents = glob.glob(F"{sentence_folder}c*")

caribbean_jsons_paras.sort()
caribbean_jsons_sents.sort()

### Single Species Testing

In [6]:
with open(caribbean_jsons_paras[22], 'r') as f:
  caribbean_species_para_test = json.load(f)

with open(caribbean_jsons_sents[22], 'r') as f:
  caribbean_species_sent_test = json.load(f)

In [7]:
species = list(caribbean_species_sent_test.keys())[0]
print(species)

Hippomane mancinella


In [8]:
text_sent = ' '.join(caribbean_species_sent_test[species])
text_paras = ' '.join(caribbean_species_para_test[species])

text_paras == text_sent

True

### Prompting per Trait

In [9]:
def create_prompt(paragraph, traits, question):

    prompt = F"""
    Text:
    {paragraph}

    Trait with possible options:
    {traits}

    {question}
    """

    return prompt

def combine_words_with_capital(string):
    # remove non-alphanumeric characters
    string = re.sub(r'[^\w\s]', '', string)  
    # combine words with capitalization
    string = ''.join(word.capitalize() for word in string.split())  
    return string


def query_ChatGPT(prompt):
    messages = [
    {"role": "user", "content": prompt}
    ]

    # Generate a response
    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=messages,
    )

    return completion

In [10]:
caribbean_traits_dict = {k: caribbean_traits_dict[k][0:] for k in list(caribbean_traits_dict)[:3]}
caribbean_traits_dict

{'Life form': ['liana', 'tree'],
 'Leaf position': ['alternate',
  'alternate, opposite',
  'opposite',
  'opposite, whorls of 3',
  'opposite, whorls of 3, alternate'],
 'Leaf composition': ['3 palmate',
  '3-5 palmate',
  '3-5 pinnate',
  '3-5 pinnate, entire',
  '5-11 pinnate',
  '5-9 pinnate',
  'bi-pinnate, 2 leaflets per jug',
  'bi-pinnate, 20-40 leaflets per jug',
  'bi-pinnate, 30-32 leaflets per jug',
  'entire',
  'pinnate, 4-6 leaflets',
  'pinnate, 6-8 leaflets']}

In [39]:
folder_prompts = "../../../data/OpenAI/PromptsResults/"
folder_species = species.replace(' ', '_')


# For spss in species:

try:
    os.makedirs(F"{folder_prompts}{folder_species}")
except FileExistsError:
    # directory already exists
    pass

for trait, trait_options in tqdm(caribbean_traits_dict.items()):

    ### PROMPT ###
    
    # Question
    question = F"Which of the following values correctly describe(s) the 'Leaf composition' trait mentioned in the text? Please select all that apply."
    
    # List Options
    # options = "\n".join([F"\t- {s}" for s in trait_options])
    # options += "\n\t- Nothing mentioned\n\t- Something else: "
    options = trait_options + ["None of the above", "Someting else (please specify)"]

    # Text
    text = text_paras

    question = F"Question: {question}  \n\nValues: {options}"
    ### PROMPT ###

    messages = [
        {"role": "assistant", "content": text},
        {"role": "user", "content": question}
    ]

    # Generate a response
    completion = openai.ChatCompletion.create(
        model = "gpt-3.5-turbo",
        messages = messages,
    )

    file_name = combine_words_with_capital(trait)
    with open(F"{folder_prompts}{folder_species}/{file_name}.json", 'w') as fp:
        json.dump(completion, fp)

100%|██████████| 3/3 [00:08<00:00,  2.86s/it]


In [38]:
print(text, '\n', question)

Spikes of small greenish flowers are followed by fruits, which are similar in appearance to an apple, are green or greenish-yellow when ripe. The manchineel is a handsome round-crowned tree that grows up to 12 metres (40 feet) in height with a 60-cm- (2-foot-) thick trunk. It has long-stalked, lustrous, leathery, elliptic yellow-green leaves. The sweet-scented fruits are borne singly or in pairs and range in colour from yellow to reddish. The fruit contains a hard stone that encloses six to nine seeds. A flowering, evergreen, and a round-crowned tree that can reach up to 50 feet in height , the Manchineel has a reddish-grey bark and a trunk that can be around two feet in diameter. It has long-stalked, shiny, and elliptical leaves that are yellow-green in colour and about 10 centimetres in length. The leaves are simple, alternately arranged, and have serrated edges. The fruit of the Manchineel resemble apples and are green or greenish-yellow when ripe. The hard stone inside the fruit co