In [39]:
import torch
import re
import os
import json
import pickle
import openai
import pandas as pd
import numpy as np
from dotenv import load_dotenv

### Dotenv

In [2]:
load_dotenv()
openai.api_key = os.environ['OPENAI_API']

### DataFrames

In [40]:
root = "../../../data/OpenAI/DataFrames/"

file = "DF_Andrei.csv"
df_Andrei = pd.read_csv(F"{root}{file}", header=[0, 1], index_col=0)\
    .rename_axis('Species', axis='index')
df_Andrei_species = list(df_Andrei.index)


file = "DF_Daniel.csv"
df_Daniel = pd.read_csv(F"{root}{file}", header=[0, 1], index_col=0)\
    .rename_axis('Species', axis='index')\
    .dropna()
df_Daniel_species = list(df_Daniel.index)

file = "DF_Pierre.csv"
df_Pierre = pd.read_csv(F"{root}{file}", header=[0, 1], index_col=0)\
    .rename_axis('Species', axis='index')
df_Pierre_species = list(df_Pierre.index)

In [48]:
df_Andrei_species = list(df_Andrei.index)

In [131]:
def extract_present_traits(species, df):

    s = df.loc[species]
    GT_traits = list(s.where(s == 1).dropna().index)
    empty_traits = [F"{Entity}:" for (Entity, Value) in GT_traits]

    return GT_traits, empty_traits

### Paragraphs

In [3]:
folder_traits = "../../../data/OpenAI/Traits/"

with open(F"{folder_traits}Andrei.json", 'r') as f:
  caribbean_traits_dict = json.load(f)

folder_descriptions = "../../../data/OpenAI/DescriptionSnippets/"

caribbean_description_paragraph_dict = pickle.load(open(F"{folder_descriptions}descriptions_paragraphs_caribbean.pkl", 'rb'))
caribbean_description_sentence_dict = pickle.load(open(F"{folder_descriptions}descriptions_sentences_caribbean.pkl", 'rb'))


### Prompting

In [97]:
paragraph_all = ' '.join(caribbean_description_paragraph_dict[df_Andrei_species[0]])
paragraph_all

'Avicennia germinans, the black mangrove,[3] is a shrub or small tree growing up to 12 meters (39 feet) in the acanthus family, Acanthaceae. The leaves often appear whitish from the salt excreted at night and on cloudy days. The heartwood is dark-brown to black, while the sapwood is yellow-brown. Leaves are simple and opposite and grow from 2 to 3 inches long. The leaf is oval and pointed, and the margins are entire. The leaves appear smooth, thick, and leathery with a dark green topside and grey to white underside. When the tree is young the bark is smooth and as it matures the bark takes on a thick and fissured texture. The flowers appear at the ends of the branches and are small, white, and fragrant with yellow centers.  Pneumatophores often rise 5–10 cm from the long horizontal roots.\r\nBark dark gray or brown and smooth on small trunks, becoming dark brown,\r\nfissured, scaly, and thick.  Leaves opposite, lanceolate or narrowly\r\nelliptical, 5–11 cm long, 2–4 cm wide, acute or b

In [133]:
GT_traits, present_traits = extract_present_traits(df_Andrei_species[0], df_Andrei)


In [111]:
question = "Given this list of tuples with plant traits, which of the traits are present in the presented text?"
question

'Given this list of tuples with plant traits, which of the traits are present in the presented text?'

In [112]:
prompt = F"""
Text: 
{paragraph_all}

List of Tuples: 
{present_traits}

{question}
"""
print(prompt)


Text: 
Avicennia germinans, the black mangrove,[3] is a shrub or small tree growing up to 12 meters (39 feet) in the acanthus family, Acanthaceae. The leaves often appear whitish from the salt excreted at night and on cloudy days. The heartwood is dark-brown to black, while the sapwood is yellow-brown. Leaves are simple and opposite and grow from 2 to 3 inches long. The leaf is oval and pointed, and the margins are entire. The leaves appear smooth, thick, and leathery with a dark green topside and grey to white underside. When the tree is young the bark is smooth and as it matures the bark takes on a thick and fissured texture. The flowers appear at the ends of the branches and are small, white, and fragrant with yellow centers.  Pneumatophores often rise 5–10 cm from the long horizontal roots.
Bark dark gray or brown and smooth on small trunks, becoming dark brown,
fissured, scaly, and thick.  Leaves opposite, lanceolate or narrowly
elliptical, 5–11 cm long, 2–4 cm wide, acute or blu

In [113]:
messages = [
    {"role": "user", "content": prompt}
    ]

# Generate a response
completion = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=messages,
)

In [114]:
completion["usage"]

<OpenAIObject at 0x7fe47ac168e0> JSON: {
  "completion_tokens": 250,
  "prompt_tokens": 714,
  "total_tokens": 964
}

In [115]:
print(completion["choices"][0]["message"]["content"])



- Life form: tree
- Leaf position: opposite
- Leaf composition: entire
- Leaf shape: lanceolate, elliptic
- Leaf margin: entire
- Leaf upper side: salt crystals
- Leaf lower side: powdery (indirectly mentioned)
- Leaf glands: absent
- Thorns/spines: absent
- Stipules: absent
- Inflorescence type: panicle
- Sepals / calyx shape: cup-shaped
- Sepals / calyx number: 5
- Petals / corolla shape: tubular
- Petals / corolla number: 4
- Petals / corolla colour: white
- Stamen shape: longer than corolla (indirectly mentioned)
- Stamen number: 4
- Fruit type: capsule
- Fruit shape: flattened, elliptical
- Fruit colour: not mentioned
- Seed colour: green (assumed since no other color is mentioned)
- Leaf glands: absent (repeated)
- Thorns/spines: absent (repeated)
- Stipules: absent (repeated)

All of the traits mentioned in the list of tuples are present in the presented text.


In [116]:
present_traits

[('Life form', 'tree'),
 ('Leaf position', 'opposite'),
 ('Leaf composition', 'entire'),
 ('Leaf shape', 'lanceolate, elliptic'),
 ('Leaf margin', 'entire'),
 ('Leaf upper side', 'salt crystals'),
 ('Leaf lower side', 'powdery'),
 ('Leaf glands', 'absent'),
 ('Thorns/spines', 'absent'),
 ('Stipules', 'absent'),
 ('Inflorescence type', 'panicle'),
 ('Sepals / calyx shape', 'free'),
 ('Sepals / calyx numer', '5'),
 ('Petals / corolla shape', 'tubular'),
 ('Petals / corolla number', '4'),
 ('Petals / corolla colour', 'white'),
 ('Stamen shape', 'longer than corolla'),
 ('Stamen number', '4'),
 ('Fruit type', 'capsule'),
 ('Fruit shape', 'flattened ovate'),
 ('Fruit colour', 'green'),
 ('Seed colour', 'green'),
 'None of them']

In [5]:
prompt_list = []

for entity, quality_value in caribbean_traits_dict.items():
    # print(entity, quality_value)

    quality_value += ["No ifnromation in the text"]

    prompt = F"The plant property {entity}, with possible values: {quality_value}"
    prompt_list.append(prompt)

In [7]:
prompt_text = ' '.join(caribbean_description_paragraph_dict['Avicennia germinans'])
prompt_text

'Avicennia germinans, the black mangrove,[3] is a shrub or small tree growing up to 12 meters (39 feet) in the acanthus family, Acanthaceae. The leaves often appear whitish from the salt excreted at night and on cloudy days. The heartwood is dark-brown to black, while the sapwood is yellow-brown. Leaves are simple and opposite and grow from 2 to 3 inches long. The leaf is oval and pointed, and the margins are entire. The leaves appear smooth, thick, and leathery with a dark green topside and grey to white underside. When the tree is young the bark is smooth and as it matures the bark takes on a thick and fissured texture. The flowers appear at the ends of the branches and are small, white, and fragrant with yellow centers.  Pneumatophores often rise 5–10 cm from the long horizontal roots.\r\nBark dark gray or brown and smooth on small trunks, becoming dark brown,\r\nfissured, scaly, and thick.  Leaves opposite, lanceolate or narrowly\r\nelliptical, 5–11 cm long, 2–4 cm wide, acute or b

In [8]:
prompt_question = "Can you extract all the semantic triples from the text?"
prompt_question

'Can you extract all the semantic triples from the text?'

In [9]:
prompt = F"""
"{prompt_text}"

{prompt_question}
"""
print(prompt)


"Avicennia germinans, the black mangrove,[3] is a shrub or small tree growing up to 12 meters (39 feet) in the acanthus family, Acanthaceae. The leaves often appear whitish from the salt excreted at night and on cloudy days. The heartwood is dark-brown to black, while the sapwood is yellow-brown. Leaves are simple and opposite and grow from 2 to 3 inches long. The leaf is oval and pointed, and the margins are entire. The leaves appear smooth, thick, and leathery with a dark green topside and grey to white underside. When the tree is young the bark is smooth and as it matures the bark takes on a thick and fissured texture. The flowers appear at the ends of the branches and are small, white, and fragrant with yellow centers.  Pneumatophores often rise 5–10 cm from the long horizontal roots.
Bark dark gray or brown and smooth on small trunks, becoming dark brown,
fissured, scaly, and thick.  Leaves opposite, lanceolate or narrowly
elliptical, 5–11 cm long, 2–4 cm wide, acute or blunt at 

In [23]:
messages = [
    {"role": "user", "content": prompt}
    ]

messages

[{'role': 'user',
  'content': '\n"Avicennia germinans, the black mangrove,[3] is a shrub or small tree growing up to 12 meters (39 feet) in the acanthus family, Acanthaceae. The leaves often appear whitish from the salt excreted at night and on cloudy days. The heartwood is dark-brown to black, while the sapwood is yellow-brown. Leaves are simple and opposite and grow from 2 to 3 inches long. The leaf is oval and pointed, and the margins are entire. The leaves appear smooth, thick, and leathery with a dark green topside and grey to white underside. When the tree is young the bark is smooth and as it matures the bark takes on a thick and fissured texture. The flowers appear at the ends of the branches and are small, white, and fragrant with yellow centers.  Pneumatophores often rise 5–10 cm from the long horizontal roots.\r\nBark dark gray or brown and smooth on small trunks, becoming dark brown,\r\nfissured, scaly, and thick.  Leaves opposite, lanceolate or narrowly\r\nelliptical, 5–1

In [24]:
# Generate a response
completion = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=messages,
)

In [34]:
completion["usage"]

<OpenAIObject at 0x7fe479739d00> JSON: {
  "completion_tokens": 287,
  "prompt_tokens": 478,
  "total_tokens": 765
}

In [32]:
print(completion["choices"][0]["message"]["content"])



1. Plant species: Avicennia germinans
2. Family name: Acanthaceae
3. Tree height: up to 12 meters
4. Leaf characteristics: simple, opposite, oval, pointed, thick, leathery, dark green topside, grey to white underside
5. Bark color and texture: dark gray or brown, smooth on small trunks, becoming dark brown, fissured, scaly and thick
6. Flower characteristics: small, white, fragrant with yellow centers, several, crowded, sessile, tubular and hairy corolla, imperfectly 4-celled ovary, slender style, and 2-forked stigma
7. Fruit characteristics: Elliptical, flattened, 2.5–3 cm long, often splitting into 2 parts
8. Root characteristics: long horizontal roots with Pneumatophores often rising 5–10 cm
9. Leaf margin: entire
10. Leaf length: 2-3 inches
11. Salt excretion: leaves often appear whitish from the salt excreted at night and on cloudy days
12. Heartwood and sapwood color: dark-brown to black (heartwood), yellow-brown (sapwood)
13. Other features: fine hairs give a grayish hue to fo

In [37]:
prompt_list

["The plant property Life form, with possible values: ['liana', 'tree', 'No ifnromation in the text']",
 "The plant property Leaf position, with possible values: ['alternate', 'alternate, opposite', 'opposite', 'opposite, whorls of 3', 'opposite, whorls of 3, alternate', 'No ifnromation in the text']",
 "The plant property Leaf composition, with possible values: ['3 palmate', '3-5 palmate', '3-5 pinnate', '3-5 pinnate, entire', '5-11 pinnate', '5-9 pinnate', 'bi-pinnate, 2 leaflets per jug', 'bi-pinnate, 20-40 leaflets per jug', 'bi-pinnate, 30-32 leaflets per jug', 'entire', 'pinnate, 4-6 leaflets', 'pinnate, 6-8 leaflets', 'No ifnromation in the text']",
 "The plant property Leaf shape, with possible values: ['elliptic', 'elliptic, elongate', 'elliptic, lanceolate', 'elliptic, obovate', 'elliptic, ovate', 'elliptic, ovate, round', 'elongate', 'elongate, elliptic, obovate', 'elongate, obovate', 'kidney-shaped, circular', 'lanceolate, elliptic', 'linear', 'linear, obovate', 'obovate', 