In [1]:
import torch
import re
import os
import json
import pickle
import openai
import pandas as pd
import numpy as np
import glob
from tqdm import tqdm
import ast
from dotenv import load_dotenv
import time

In [2]:
load_dotenv()
openai.api_key = os.environ['OPENAI_API']

In [15]:
def extract_present_traits(species, df):

    s = df.loc[species]
    GT_traits = list(s.where(s == 1).dropna().index)
    empty_traits = [F"{Entity}:" for (Entity, Value) in GT_traits]

    return GT_traits, empty_traits

def create_trait_dict(multi_index):
    # Create an empty dictionary
    result_dict = {}

    # Iterate through the MultiIndex object
    for idx in multi_index:
        key = idx[0]
        value = idx[1]
        if key not in result_dict:
            result_dict[key] = []
        result_dict[key].append(value)
    
    return result_dict

### Trait Dicts

In [4]:
folder_traits = "../../../data/OpenAI/Traits/"

with open(F"{folder_traits}Andrei.json", 'r') as f:
  caribbean_traits_dict = json.load(f)

with open(F"{folder_traits}Andrei.json", 'r') as f:
  caribbean_traits_dict = json.load(f)

with open(F"{folder_traits}Andrei.json", 'r') as f:
  caribbean_traits_dict = json.load(f)

### DataFrames

In [6]:
root = "../../../data/OpenAI/DataFrames/"

file = "DF_Andrei.csv"
df_Andrei = pd.read_csv(F"{root}{file}", header=[0, 1], index_col=0)\
    .rename_axis('Species', axis='index')
df_Andrei_species = list(df_Andrei.index)


file = "DF_Daniel.csv"
df_Daniel = pd.read_csv(F"{root}{file}", header=[0, 1], index_col=0)\
    .rename_axis('Species', axis='index')\
    .dropna()
df_Daniel_species = list(df_Daniel.index)

file = "DF_Pierre.csv"
df_Pierre = pd.read_csv(F"{root}{file}", header=[0, 1], index_col=0)\
    .rename_axis('Species', axis='index')
df_Pierre_species = list(df_Pierre.index)

In [7]:
species_lst = df_Andrei.index
traits = list(caribbean_traits_dict.keys())

In [7]:
# Define the path to the directory where the prompts and results will be saved
folder_prompts = "../../../data/OpenAI/PromptsResults/ZeroShot/"

# Loop over species
for idx, species in (pbar := tqdm(enumerate(species_lst[0:5]), leave=False, position=0)):
    pbar.set_description(f"{idx}: {species}")
   
    # Replace spaces in the species name with underscores
    file_name = species.replace(' ', '_')

    # Create the question and options for the ChatGPT prompt
    question = F"Can you create semantic triples for the following traits {traits}, for the species '{species}' and return the answer as a list of of tuples for easy parsing?"
    user_content = F"{question}"

    # Check if file is already there (OpenAI Outage)
    if os.path.exists(F"{folder_prompts}{file_name}.json"):
        continue

    # # Create the messages to send to the ChatGPT API
    messages = [
        {"role": "user", "content": user_content}
        ]
    # Call the ChatGPT API to generate a completion for the prompt
    completion = openai.ChatCompletion.create(
        model = "gpt-3.5-turbo",
        messages = messages,
    )

    # Lower pressure on API?
    time.sleep(2)

    # Save the completion to a JSON file with the file name in the directory
    with open(F"{folder_prompts}{file_name}.json", 'w') as fp:
        json.dump(completion, fp)
            

                                                    

In [9]:
species = species_lst[0]

question = F"Can you create semantic triples for the following traits {traits}, for the species '{species}' based on the following Python dictionary and return the answer as a list of of tuples for easy parsing?"
user_content = F"{question}"

In [11]:
print(user_content)

Can you create semantic triples for the following traits ['Life form', 'Leaf position', 'Leaf composition', 'Leaf shape', 'Leaf margin', 'Leaf upper side', 'Leaf lower side', 'Leaf glands', 'Leaf rachis', 'Thorns/spines', 'Stipules', 'Inflorescence type', 'Sepals / calyx shape', 'Sepals / calyx numer', 'Petals / corolla shape', 'Petals / corolla number', 'Petals / corolla colour', 'Stamen shape', 'Stamen number', 'Fruit type', 'Fruit shape', 'Fruit colour', 'Aril colour', 'Seed colour'], for the species 'Avicennia germinans' based on the following Python dictionary and return the answer as a list of of tuples for easy parsing?


In [12]:
s = df_Andrei.loc[species]
GT_traits = list(s.where(s == 1).dropna().index)
empty_traits = [F"{Entity}:" for (Entity, Value) in GT_traits]

In [18]:
trait_dict = create_trait_dict(df_Andrei)

In [73]:
trait = list(trait_dict.keys())[20]
options = trait_dict[trait]

In [76]:
question = F"""For the species '{species}' which of the following options is applicable for the trait '{trait}':
{options}

Return 'NA' if none of the options are apliccable.
Please return you answer as a Python tuple list.
"""

In [77]:
print(question)

For the species 'Avicennia germinans' which of the following options is applicable for the trait 'Fruit shape':
['2-locular, flattened, globose', '3-5-locular, globose', 'elongate', 'elongate, constricted between seeds', 'elongate, slightly constricted between seeds', 'elongate, strongly constricted between seeds', 'flattened obovoid', 'flattened ovate', 'flattened, elongate, twisted', 'flattened, thick, elongate, strongly curved', 'globose', 'globose, 3-angulated', 'linear', 'not flattened, elongate, slightly curved', 'ovoid', 'pyriform, ovoid', 'slightly flattened, elongate, curved']

Return 'NA' if none of the options are apliccable.
Please return you answer as a Python tuple list.



In [78]:
messages = [
    {"role": "user", "content": question}
    ]
# Call the ChatGPT API to generate a completion for the prompt
completion = openai.ChatCompletion.create(
    model = "gpt-3.5-turbo",
    messages = messages,
)

In [79]:
completion

<OpenAIObject chat.completion id=chatcmpl-6yI43eNqTZHlKNaXqo9W76xB0IvlC at 0x7fd771860090> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "The applicable option for the trait 'Fruit shape' in the species 'Avicennia germinans' is \"elongate, slightly constricted between seeds\".\n\nTherefore, the Python tuple list is:\n[('Fruit shape', 'elongate, slightly constricted between seeds')]",
        "role": "assistant"
      }
    }
  ],
  "created": 1679827139,
  "id": "chatcmpl-6yI43eNqTZHlKNaXqo9W76xB0IvlC",
  "model": "gpt-3.5-turbo-0301",
  "object": "chat.completion",
  "usage": {
    "completion_tokens": 58,
    "prompt_tokens": 203,
    "total_tokens": 261
  }
}