# NLQ with OpenAI Assistants API

## Setup logging

In [None]:
import logging
import sys

logger = logging.getLogger('')
logger.setLevel(logging.DEBUG)

handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))

logger.handlers.clear()
logger.addHandler(handler)

logging.getLogger("openai").setLevel(logging.ERROR)
logging.getLogger("httpx").setLevel(logging.ERROR)
logging.getLogger("httpcore").setLevel(logging.ERROR)

## Create chat assistant

### Configuration

### Run GraphDB

You need a running GraphDB instance. This tutorial shows how to run the database locally using the GraphDB Docker image. It provides a docker compose set-up, which populates GraphDB with the Star Wars dataset.

- Install [Docker](https://docs.docker.com/get-docker/). This tutorial is created using Docker version 28.0.1 which bundles Docker Compose. For earlier Docker versions you may need to install Docker Compose separately.
- Start GraphDB with the following script executed from the `docker` folder

```
docker build --tag graphdb .
docker compose up -d graphdb
```

You need to wait a couple of seconds for the database to start on http://localhost:7200/. The Star Wars dataset starwars-data.ttl is automatically loaded into the `starwars` repository. The local SPARQL endpoint http://localhost:7200/repositories/starwars can be used to run queries against. You can also open the GraphDB Workbench from your favourite web browser http://localhost:7200/sparql where you can make queries interactively.

#### GraphDB

In [None]:
from base64 import b64encode

from ttyg.graphdb import GraphDB

graphdb_base_url = "http://localhost:7200"
graphdb_repository_id = "starwars"

# If GraphDB is not secured
graph = GraphDB(
    base_url=graphdb_base_url,
    repository_id=graphdb_repository_id,
)

# If GraphDB is secured, you can use the auth_header parameter to pass the value of the "Authorization" header.
# The example below uses a basic authentication.
# username, password = "admin", "root"
# graph = GraphDB(
#     base_url=graphdb_base_url,
#     repository_id=graphdb_repository_id,
#     auth_header="Basic " + b64encode(f"{username}:{password}".encode("ascii")).decode(),
# )

#### OpenAI

In [None]:
import openai

from ttyg.utils import set_env

llm_model = "gpt-4o"
llm_temperature = 0

set_env("OPENAI_API_KEY")
openai_client = openai.OpenAI()

#### Azure OpenAI

Alternatively, you can use Azure

In [None]:
import openai

from ttyg.utils import set_env

set_env("AZURE_OPENAI_API_KEY")
api_version = "2024-05-01-preview"
llm_model = "gpt-4o"
azure_endpoint = "https://{workspace-id}.openai.azure.com/"
llm_temperature = 0

openai_client = openai.AzureOpenAI(
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)

#### Assistant

##### Tools

In [None]:
from pathlib import Path

from ttyg.tools import (
    FTSTool,
    IRIDiscoveryTool,
    NowTool,
    RetrievalQueryTool,
    SimilaritySearchQueryTool,
    SparqlQueryTool,
)

ontology_schema_file_path = Path("..") / "docker" / "SWAPI-ontology.ttl"
sparql_query_tool = SparqlQueryTool(
    graph=graph,
    ontology_schema_file_path=ontology_schema_file_path,
)

# The full-text search (FTS) must be enabled for the repository in order to use this tool.
# For details how to enable it check the documentation https://graphdb.ontotext.com/documentation/10.8/full-text-search.html#simple-full-text-search-index .
# It's also recommended to compute the RDF rank for the repository.
# For details how to compute it refer to the documentation https://graphdb.ontotext.com/documentation/10.8/ranking-results.html .
fts_tool = FTSTool(
    graph=graph,
)

# The full-text search (FTS) must be enabled for the repository in order to use this tool.
# For details how to enable it check the documentation https://graphdb.ontotext.com/documentation/10.8/full-text-search.html#simple-full-text-search-index .
# It's also recommended to compute the RDF rank for the repository.
# For details how to compute it refer to the documentation https://graphdb.ontotext.com/documentation/10.8/ranking-results.html .
iri_discovery_tool = IRIDiscoveryTool(
    graph=graph,
)

# ChatGPT Retrieval Plugin Connector must exist in order to use this tool.
# In order to set up the ChatGPT Retrieval Connector Tool with an open source LLM, contact Graphwise, doing business as Ontotext, for additional help.
# retrieval_connector_name = "retrievalConnector"
# retrieval_query_tool = RetrievalQueryTool(
#     graph=graph,
#     connector_name=retrieval_connector_name,
# )

# Similarity Index must exist in order to use this tool.
similarity_index_name = "similarityIndex"
similarity_score_threshold = 0.9
similarity_query_tool = SimilaritySearchQueryTool(
    graph=graph,
    index_name=similarity_index_name,
    similarity_score_threshold=similarity_score_threshold,
)

now_tool = NowTool()

In [None]:
from ttyg.agents import OpenAIAssistant
from ttyg.tools import Toolkit

instructions = f"""You are a natural language querying assistant, and you answer the users' questions.
If you need to write a SPARQL query, use only the classes and properties provided in the schema and don't invent or guess any.
Include all the prefixes from the ontology schema in the SPARQL queries.
The ontology schema in turtle format to use in SPARQL queries is:
```turtle
{sparql_query_tool.schema_graph.serialize(format='turtle')}
```
"""

assistant = OpenAIAssistant.create(
    model=llm_model,
    temperature=llm_temperature,
    instructions=instructions,
    openai_client=openai_client,
    toolkit=Toolkit([
        sparql_query_tool,
        fts_tool,
        iri_discovery_tool,
        # retrieval_query_tool,
        similarity_query_tool,
        now_tool,
    ])
)

## Conversation

In [None]:
user_question = "How many Star Wars movies are there"

thread_id = assistant.create_thread().id
logging.debug(f"Started new thread {thread_id}")
response = assistant.create_message_and_run(thread_id, user_question)
print(response)

In [None]:
user_question = "How many awards each of them received"

response = assistant.create_message_and_run(thread_id, user_question)
print(response)

## Delete the thread

In [None]:
from openai.types.beta import ThreadDeleted

thread_deleted: ThreadDeleted = assistant.delete_thread(thread_id)
logging.info(f"Thread {thread_deleted.id} is deleted ? {thread_deleted.deleted}")

## Delete the Assistant

In [None]:
from openai.types.beta import AssistantDeleted

assistant_deleted: AssistantDeleted = assistant.delete_assistant()
logging.info(f"Assistant {assistant_deleted.id} is deleted ? {assistant_deleted.deleted}")

When you're finished playing with NLQ with GraphDB, you can shut down the Docker environment by running 
```
docker compose down -v --remove-orphans
```
from the `docker` directory.