In [None]:
import openai
from openai import OpenAI
import os
from dotenv import load_dotenv, find_dotenv
import pandas as pd
import ast

# TODO: there must be some cleaner way use local packages with poetry, but lets use this quick hack for now...
import sys
sys.path.append('../../')  # Adjust the path as needed
from utils.spacy_visualizer import SpacyLabelVisualizer


# Configurate OpenAI Credentials

In [None]:
load_dotenv(find_dotenv())
OPENAI_SECRET_KEY = os.getenv('OPENAI_SECRET_KEY')
os.environ["OPENAI_API_KEY"] = OPENAI_SECRET_KEY

# Prepare Data

In [None]:
df = pd.read_excel('data.xlsx').iloc[1:99,:]

In [None]:
df.head()

In [None]:
df.info()

# Create Prompts

In [None]:
# read txt file. Do not use with open
prompt1 = open('prompt_1.txt', 'r').read()

# Predict / Label 

In [None]:
class SemanticRoleLabeler:
  def __init__(self, prompt: str):
    self._seed = 42 # ensure reproducable results
    self._temperature = 0.0 # ensure deterministic results
    self._client = OpenAI() 
    self._model = "gpt-3.5-turbo-1106" 
    # self._model="gpt-4-1106-preview", # this is expensive
    self.system_prompt = prompt   
    
    
  # MARK: - Public Methods
  def label(self, sentence) -> [str]:
    response = self._computeReponse(sentence)
    message = self._extract_message_from_response(response)
    string_valued_labels = self._extract_final_line(message)
    # string_valued_labels = self._extract_message_from_response(message)
    labels = self._convert_string_typed_labels_to_proper_List(string_valued_labels)
    return labels
        
  # MARK: - Private Methods
  def _computeReponse(self, sentence):
     response = self._client.chat.completions.create(
      seed=self._seed, 
      temperature=self._temperature, 
      model=self._model,
      # response_format={ "type": "json_object" },
      messages=[
        {"role": "system", "content": self.system_prompt},
        {"role": "user", "content": sentence},
      ],
      # stream=True,
    )
     return response
  
     
  def _extract_message_from_response(self, response) -> str:
    return response.choices[0].message.content
  
  def _extract_final_line(self, message: str) -> str:
    message.split("```\n")[-1]
    # remove trailing \n```
    message = message.split("```\n")[-1].replace("\n```", "")
    return message
    
    
  # deprecated; was used for sentence level semantic role labeling
  def _convert_string_typed_labels_to_proper_List(self, message: str) -> list:
    try:
      result = ast.literal_eval(message)
    except:
      print("Error: could not convert string to list")
      print(f"Received message: \n\n {message}")
      result = []
    return result
    # return result


# Predict Labels

In [None]:
Labeler = SemanticRoleLabeler(prompt=prompt1)

In [None]:
# select 4th column
tokenized_sentences = df["Unnamed: 3"]
labels = df["Unnamed: 4"]

sentence = ast.literal_eval(tokenized_sentences[10])
label = ast.literal_eval(labels[10])
print(sentence)
print(label)



In [None]:
sentence_labels = Labeler.label(str(sentence))

# Visualize

In [None]:
visualizer = SpacyLabelVisualizer()

In [None]:
visualizer.display(sentence, sentence_labels)

# Compute Confusion Matrices