In [None]:
#! pip install openai

In [None]:
script_version = '0.0.1'
version_modified = '2024-02-07'

# -------------------
# Imports
# -------------------

from openai import OpenAI
from pathlib import Path
import json
import time
import pandas as pd

# -------------------
# Global variables
# -------------------

# create a base prompt that will be used for all questions
BASE_PROMPT = """Below is a CSV file that contains titles of artworks. Each title is delineated by a comma. For each artwork title, give me the list of:
- object named entity
- location named entity
- person named entity
- genre named entity
- miscellaneous named entity.
For each title, format the output in json with the following keys:
- TITLE for the title of the artwork
- OBJECT for organization named entity
- LOCATION for location named entity
- PERSON for person named entity
- GENRE for genre named entity
- MISCELLANEOUS for miscellaneous named entity.
Example below: 
- Title 1: "Portrait of the Artist Pablo Picasso with a Guitar in Paris"
- Output:
{
    "TITLE": "Portrait of the Artist Pablo Picasso with a Guitar in Paris",
    "OBJECT": ["guitar"],
    "LOCATION": ["Paris"],
    "PERSON": ["Pablo Picasso"],
    "GENRE": ["portrait"],
    "MISCELLANEOUS": ["artist"]
}
"""

OPENAI_QUERY_PARAMS = {
    "model": "gpt-3.5-turbo",
    "temperature": 0,
    "max_tokens": 1024
}

# add your API key here
openai_key_filename = 'open_ai_api_key_text_analysis.txt'

home = str(Path.home()) # gets path to home directory; supposed to work for both Win and Mac
with open(home + '/' + openai_key_filename, 'r') as file:
    api_key_string = file.read().strip() # remove any leading or trailing white space or newlines

CLIENT = OpenAI(api_key=api_key_string)


In [None]:
# -------------------
# Function definitions
# -------------------

def ask_openai(prompt: str, base_prompt=BASE_PROMPT, openai_query_params=OPENAI_QUERY_PARAMS) -> str:
    """Send a request to OpenAI's ChatGPT API to do entity recognition. The prompt should be a sentence or paragraph of text
    on which you want to perform NER.
    
    The function returns a JSON-formatted string with the named entities extracted from the input text.
    """
    response = CLIENT.chat.completions.create(
        messages=[
        {
            "role": "system", 
            "content": "You are a smart and intelligent Named Entity Recognition (NER) system whose job is to extract entities from the title of an artwork. You will look for people, location names, and common objects. You will also look for genres of visual art and any miscellaneous entities. The labels in your output should not include words that are not in the title."
        },
        {
            "role": "user", 
            "content": base_prompt + prompt
        }        
    ],
        **openai_query_params
    )
    
    return(response.choices[0].message.content)


In [None]:
# example 
example_text = "Vanderbilt University is a private research university in Nashville, Tennessee. It was founded in 1873."
print(ask_openai(example_text))

In [None]:
# getting artwork titles for NER
title_pd = pd.read_csv(home + '/GitHub/vandycite/gallery_buchanan/works_multiprop.csv')
url_pd = pd.read_csv(home + '/GitHub/vandycite/gallery_buchanan/image_analysis/object_localization_image_urls.csv')

# delete all columns except title and inventory_number
title_pd = title_pd[['title', 'inventory_number']]

# only keep rows of title_pd with inventory_number in url_pd and status = uploaded
url_pd = url_pd[url_pd['status'] == 'uploaded']
title_pd = title_pd[title_pd['inventory_number'].isin(url_pd['accession_number'])]

# isolate titles only to send to OpenAI
title_pd_slice = title_pd['title']

In [None]:
# ask OpenAI for NER
print(ask_openai(title_pd_slice.to_csv(index=False, header=False)))