In [None]:
import openai
from openai import OpenAI
import os
from dotenv import load_dotenv, find_dotenv
import pandas as pd
import ast


# Configurate OpenAI Credentials

In [None]:
load_dotenv(find_dotenv())

SECRET_KEY = os.environ.get("SECRET_KEY")
DATABASE_PASSWORD = os.environ.get("DATABASE_PASSWORD")

OPENAI_SECRET_KEY = os.getenv('OPENAI_SECRET_KEY')
# OPENAI_SECRET_KEY = "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"
os.environ["OPENAI_API_KEY"] = OPENAI_SECRET_KEY

client = OpenAI() 

# Prepare Data

In [None]:
df = pd.read_excel('data.xlsx').iloc[1:99,:]

In [None]:
df.head()

In [None]:
df.info()

In [None]:
# select 4th column
tokenized_sentences = df["Unnamed: 3"]
labels = df["Unnamed: 4"]
# select first row

sentence = ast.literal_eval(tokenized_sentences[1])
label = ast.literal_eval(labels[1])



# Create Prompts

In [None]:
# read txt file. Do not use with open
prompt1 = open('prompt_1.txt', 'r').read()

# Predict / Label 

In [None]:
class SemanticRoleLabeler:
  def __init__(self, prompt: str):
    self._seed = 42 # ensure reproducable results
    self._temperature = 0.0 # ensure deterministic results
    self._client = OpenAI() 
    self._model = "gpt-3.5-turbo-1106" 
    # self._model="gpt-4-1106-preview", # this is expensive
    self.system_prompt = prompt   
    
    
  # MARK: - Public Methods
  def label(self, sentence) -> [str]:
    response = self._computeReponse(sentence)
    message = self._extract_message_from_response(response)
    string_valued_labels = self._extract_final_line(message)
    # string_valued_labels = self._extract_message_from_response(message)
    labels = self._convert_string_typed_labels_to_proper_List(string_valued_labels)
    return labels
        
  # MARK: - Private Methods
  def _computeReponse(self, sentence):
     response = self._client.chat.completions.create(
      seed=self._seed, 
      temperature=self._temperature, 
      model=self._model,
      # response_format={ "type": "json_object" },
      messages=[
        {"role": "system", "content": self.system_prompt},
        {"role": "user", "content": sentence},
      ],
      # stream=True,
    )
     return response
  
     
  def _extract_message_from_response(self, response) -> str:
    return response.choices[0].message.content
  
  def _extract_final_line(self, message: str) -> str:
    message.split("```\n")[-1]
    # remove trailing \n```
    message = message.split("```\n")[-1].replace("\n```", "")
    return message
    
  def _convert_string_typed_labels_to_proper_List(self, message: str) -> list:
    result = ast.literal_eval(message)
    return result


In [None]:
Labeler = SemanticRoleLabeler(prompt=prompt1)


In [None]:
sentence_labels = Labeler.label(str(sentence))

# Visualize

In [None]:
import spacy
from spacy import displacy
from spacy.tokens import Span

# set_of_desired_tags = {'Recipient', 'O', 'Object', 'Actor', 'Action', 'Precondition'}


class SpacyLabelVisualizer:
    def __init__(self, 
                 label_color_dict: dict = {
                     "Recipient": "aquamarine",
                     "Object": "tomato",
                        "Actor": "pink",
                        "P": "yellow", # note that the number of tokens we generate influences costs, so ideally we use one letter labels.
                        "Action": "green",
                     
                     }, 
                 nil_label: str = "O"):
        self.nil_label = nil_label
        self._label_color_dict = label_color_dict
        self._possible_labels = list(label_color_dict.keys())
        
    def create_displacy_dict(self, words, labels):
        # Join the words into a single string (sentence)
        sentence = ' '.join(words)

        # Initialize the list of entities
        entities = []
        start = 0

        for word, label in zip(words, labels):
            end = start + len(word)  # Calculate end index of the word
            if label != self.nil_label:  # Assuming 'O' is the label for non-entities
                entities.append({"start": start, "end": end, "label": label})
            start = end + 1  # Update start index for next word (+1 for space)

        # Create the dictionary in the required format
        displacy_dict = {
            "text": sentence,
            "ents": entities,
            "title": None
        }

        return displacy_dict    
    
    def display(self, words, labels):
        displacy_dict = self.create_displacy_dict(words, labels)
        colors = self._label_color_dict
        html = displacy.render(displacy_dict, style="ent", manual=True, options={"colors": colors})
        return html



visualizer = SpacyLabelVisualizer()
visualizer.display(sentence, sentence_labels)

# Compute Confusion Matrices