This notebook walks through an example of how to use the Aryn Partitioning Service with OpenAI's gpt-40-mini model to transform process flow diagrams (PFDs) into a DAG that is loaded into neo4j.

We first use the Aryn SDK to pull out an image of the PFD from a page. We then send that diagram to OpenAI's gpt model and ask it to transform the contents of that flow into a DAG expressed as nodes and edges in a JSON which are then loaded into neo4j.

You will need an Aryn API key, an Open AI API key, and a neo4j AuraDB instance to run this notebook. You can obtain an Aryn API key [here](https://www.aryn.ai/get-started ), an OpenAI API key [here](https://platform.openai.com/api-keys), and a neo4j AuraDB instance [here](https://neo4j.com/product/auradb/).

In [None]:
#Install Aryn SDK
!pip install aryn-sdk

#Install Poppler utils so the PDF can be displayed
!apt-get install poppler-utils

#Install OpenAI SDK
!pip install openai

#Install pydantic
!pip install pydantic

#Install neo4j
!pip install neo4j

In [None]:
# Import necessary functions
import aryn_sdk
from aryn_sdk.partition import partition_file, draw_with_boxes, convert_image_element, table_elem_to_dataframe
import json
import pdf2image
from openai import OpenAI
from pydantic import BaseModel
from openai.lib._parsing import type_to_response_format_param as pydantic_to_response_format
from neo4j import GraphDatabase
from google.colab import userdata

In [None]:
# Set your secrets in the colab notebook. Navigate to the left pane
# and choose the key option to set your keys. Make sure to enable Notebook access

# Visit https://www.aryn.ai/get-started to get a key.
aryn_api_key = userdata.get('aryn_api_key')

# Visit https://platform.openai.com/api-keys to get a key.
openai_api_key = userdata.get('openai_api_key')

# Visit https://neo4j.com/product/auradb/ to get neo4j URI, Username, and Password
neo4j_uri = userdata.get('NEO4J_URI')
neo4j_username = userdata.get('NEO4J_USERNAME')
neo4j_password = userdata.get('NEO4J_PASSWORD')

In [None]:
# get files from Aryn's public S3 bucket.
![ -f PFD2.pdf ] || wget https://aryn-public.s3.amazonaws.com/partitioner-blog-data/PFD2.pdf
![ -f PFD_2017.pdf ] || wget https://aryn-public.s3.amazonaws.com/partitioner-blog-data/PFD_2017.pdf
![ -f PFD_2018.pdf ] || wget https://aryn-public.s3.amazonaws.com/partitioner-blog-data/PFD_2018.pdf

In [None]:
# open the file
file_name = 'PFD2.pdf'

# show the pdf
pdf2image.convert_from_path(file_name)[0]

In [None]:
# Open the file.
file = open(file_name, 'rb')

## Make a call to the partitioning service to break down the file into its constituent
## components: images, tables, and captions. The documents elements are returned in a Python dict.
document_dict = partition_file(file, aryn_api_key, extract_images=True, extract_table_structure=True, use_ocr=True, threshold=0.20)

# show the pdf with bounding boxes of the elements superimposed.
draw_with_boxes(file_name, document_dict)[0]

In [None]:
## Walk over the table elements in the returned document dict and show the first one
tables = [e for e in document_dict['elements'] if e['type'] == 'table']

## Convert the table to a pandas dataframe and display it.
table_df = table_elem_to_dataframe(tables[0])
display(table_df)
print("\n")

## Walk over the image elements in the returned document dict and show the first one
images = [e for e in document_dict['elements'] if e['type'] == 'Image']
images[0]

In [None]:
## Let's convert image to PIL to see it more closely.
pil_img = convert_image_element(images[0], format='PIL')
display(pil_img)

In [None]:
# Setup Pydantic models for a labeled DAG (directed acyclic graph) to form a response_format for gpt-4o
class node(BaseModel):
    label: str
    description: str

class edge(BaseModel):
    source: str
    target: str
    description: str

class diagram_DAG(BaseModel):
    nodes: list[node]
    edges: list[edge]

response_format = pydantic_to_response_format(diagram_DAG)

In [None]:
# convert image to base64 encoded JPEG for OpenAI to process it.
jpeg_img = convert_image_element(images[0], format='JPEG', b64encode=True)

# Execute a completion request to gpt-4o and print the response.
openai_client = OpenAI(api_key=openai_api_key)

response = openai_client.chat.completions.create(
  model="gpt-4o-mini",
  messages=[
    {
      "role": "user",
      "content": [
        {"type": "text",
         "text": """Transform the following PFD (process flow) diagram into a DAG and return it in JSON.
                    Ensure each node has a unique label. Ignore the labeled circles.
                 """},
        {
          "type": "image_url",
          "image_url": {"url": f"data:image/jpeg;base64,{jpeg_img}", "detail": "high"},
        },
      ],
    }
  ],
  # Ask openAI to respond back with
  response_format = response_format,
  max_tokens=1000,
)

# The response from gpt-4o will be a JSON string
DAG_json = response.choices[0].message.content
# print("Returned JSON:")
# print(DAG_json)
# print("\n")

# Load the JSON response from gpt-4o into a dict and print it
DAG_dict = json.loads(DAG_json)
print("Python DAG:")
print("Nodes:")
for n in DAG_dict['nodes']:
  print(f"  \"{n['label']}\": \"{n['description']}\"")
print("Edges:")
for e in DAG_dict['edges']:
  print(f"  \"{e['source']}\" -> \"{e['target']}\": \"{e['description']}\"")
print("\n")

In [None]:
# This function combines all of the above mentioned steps

def convert_pfd_to_dag(pfd_fn: str, openai_client: OpenAI) -> dict:

  # show the first page of the PDF
  # display(pdf2image.convert_from_path(pfd_fn)[0])

  # Open the file.
  pfd_file = open(pfd_fn, 'rb')

  ## Make a call to the partitioning service to break down the filex into their constituent
  ## components: images, tables, and captions.
  pfd_dict = partition_file(pfd_file, aryn_api_key, extract_images=True, extract_table_structure=True, use_ocr=True, threshold=0.20)

  ## Walk over the image elements in the returned document dict and show the first one
  pfd_images = [e for e in pfd_dict['elements'] if e['type'] == 'Image']

  print("Converting this PFD diagram: ")
  ## Let's convert image to PIL to see it more closely.
  pfd_pil_img = convert_image_element(pfd_images[0], format='PIL')
  display(pfd_pil_img)

  # convert image to base64 encoded JPEG for gpt-4o-mini to process it.
  pfd_jpeg = convert_image_element(pfd_images[0], format='JPEG', b64encode=True)

  print("Calling gpt-4o: ")
  response = openai_client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
      {
        "role": "user",
        "content": [
          {"type": "text",
           "text": """Transform the following PFD (process flow) diagram into a DAG and return it in JSON.
                      Ensure each node has a unique label. """},
          {"type": "image_url",
           "image_url": {"url": f"data:image/jpeg;base64,{pfd_jpeg}", "detail": "high"},},
        ],
      }
    ],

    # Ask openAI to respond back with
    response_format = response_format,
    max_tokens=5000,
  )

  # The response from gpt-4o will be a JSON string
  pfd_json = response.choices[0].message.content
  # print("Returned JSON:")
  # print(pfd_json)
  # print("\n")

  # Load the JSON response from gpt-4o into a dict and print it
  pfd_DAG = json.loads(pfd_json)
  print("Python DAG:")
  print("Nodes:")
  for n in pfd_DAG['nodes']:
    print(f"  \"{n['label']}\": \"{n['description']}\"")
  print("Edges:")
  for e in pfd_DAG['edges']:
    print(f"  \"{e['source']}\" -> \"{e['target']}\": \"{e['description']}\"")
  print("\n")

  return pfd_DAG

In [None]:
pfd_2017 = convert_pfd_to_dag('PFD_2017.pdf', openai_client)
pfd_2018  = convert_pfd_to_dag('PFD_2018.pdf', openai_client)

In [None]:
driver = GraphDatabase.driver(uri=neo4j_uri, auth=(neo4j_username, neo4j_password))

def convert_graph_to_neo4j(pfd):
    nodes_prompts = []
    for i, node in enumerate(pfd["nodes"]):
        query = f"MERGE (n:Node {{label: '{node['label']}', description: '{node['description']}'}})"
        nodes_prompts.append(query)

    edges_prompts = []
    for i, edge in enumerate(pfd["edges"]):
        source_label = edge["source"]
        target_label = edge["target"]
        description = edge["description"]
        
        query = f"""
        MATCH (n:Node {{label: '{source_label}'}})
        MATCH (k:Node {{label: '{target_label}'}})
        WITH n, k
        WHERE n IS NOT NULL AND k IS NOT NULL
        MERGE (n)-[:Edge {{description: '{description}'}}]->(k)
        """
        edges_prompts.append(query)

    return nodes_prompts, edges_prompts

In [None]:
#Load PFD_2017 into neo4j
nodes, edges = convert_graph_to_neo4j(pfd_2017)
with driver.session() as tx:
  for node_prompt in nodes:
    tx.run(node_prompt)
  for edge_prompt in edges:
    tx.run(edge_prompt)

In [None]:
#Delete Neo4j
with driver.session() as tx:
    tx.run("""
    MATCH (n) 
    DETACH DELETE n;"""
    )

In [None]:
#Load PFD_2018 into neo4j
nodes, edges = convert_graph_to_neo4j(pfd_2018)
with driver.session() as tx:
  for node_prompt in nodes:
    tx.run(node_prompt)
  for edge_prompt in edges:
    tx.run(edge_prompt)