In [1]:
from dotenv import load_dotenv
load_dotenv()

import logging
import sys
date_strftime_format = "%Y-%m-%y %H:%M:%S"
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s %(message)s", datefmt=date_strftime_format)

from falkordb_gemini_kg.models.openai import (
  OpenAiGenerativeModel
)
from falkordb_gemini_kg.models.gemini import GeminiGenerativeModel
from falkordb_gemini_kg import KnowledgeGraph, Ontology, KnowledgeGraphModelConfig
from falkordb_gemini_kg.classes.source import Source, HTML
import vertexai
import os
import json
from falkordb import FalkorDB

# Initialize the Vertex AI client
vertexai.init(project=os.getenv("PROJECT_ID"), location=os.getenv("REGION"))

model = OpenAiGenerativeModel("gpt-4o")

In [2]:
sources = [
  Source("./data/raw/animals.txt")
]
# src_files = "./data/raw"
# # For each file in the source directory, create a new Source object
# for file in os.listdir(src_files):
#     print(os.path.join(src_files, file))
#     sources.append(Source(os.path.join(src_files, file)))

# urls_files = "./data/policies"

# for file in os.listdir(urls_files):
#     print(os.path.join(urls_files, file))
#     sources.append(HTML(os.path.join(urls_files, file)))

In [3]:
boundaries = """
  The ontology should be pertinent only to the information about animals you can bring on board.
"""

ontology = Ontology.from_sources(sources, boundaries, model)

falkordb = FalkorDB()
ontology.save_to_graph(falkordb.select_graph("qatar_qa_ontology"))

# save to json
with open("ontologies/qatar_qa_ontology.json", "w") as f:
    json.dump(ontology.to_json(), f, indent=2)

2024-07-24 11:06:38 load_ssl_context verify=True cert=None trust_env=True http2=False
2024-07-24 11:06:38 load_verify_locations cafile='/Users/davidzimberknopf/Documents/Apps/falkordb-gemini-kg/.venv/lib/python3.11/site-packages/certifi/cacert.pem'
2024-07-24 11:06:38 Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': '\n## 1. Overview\n"\nYou are a top-tier algorithm designed for extracting ontologies in structured formats to build a knowledge graph from raw texts.\nCapture as many entities, relationships, and attributes information from the text as possible. \n- **Entities** represent entities and concepts. Must have at least one unique attribute.\n- **Relations** represent relationships between entities and concepts.\nThe aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience.\nUse the `attributes` field to capture additional information about entit

In [10]:
# import ontology from json file
with open("ontologies/qatar_qa_ontology.json", "r", encoding="utf-8") as file:
    ontology = Ontology.from_json(json.load(file))

falkordb = FalkorDB()
ontology.save_to_graph(falkordb.select_graph("qatar_qa_ontology"))

kg = KnowledgeGraph(
    name="qatar_qa",
    model_config=KnowledgeGraphModelConfig.with_model(model),
    ontology=ontology,
)

2024-07-24 11:10:28 Query: MERGE (n:Airline {name: "string!*"}) RETURN n
2024-07-24 11:10:28 Query: MERGE (n:Pet {type: "string!*", travel_method: "string", health_status: "string", route: "string", max_per_cabin: "string", max_per_passenger: "string"}) RETURN n
2024-07-24 11:10:28 Query: MERGE (n:HealthCertificate {type: "string!*", signed_by: "string*", fit_to_fly: "boolean*"}) RETURN n
2024-07-24 11:10:28 Query: MERGE (n:PetContainer {type: "string!*", size: "string", ventilation: "string", design: "string", strength: "string"}) RETURN n
2024-07-24 11:10:28 Query: MERGE (n:Nationality {country: "string!*"}) RETURN n
2024-07-24 11:10:28 Query: MERGE (n:Document {type: "string!*", source: "string", required: "boolean"}) RETURN n
2024-07-24 11:10:28 Query: MERGE (n:ServiceDog {name: "string!*", route: "string", cabin_free_of_charge: "boolean"}) RETURN n
2024-07-24 11:10:28 Query: MERGE (n:Country {name: "string!*"}) RETURN n
2024-07-24 11:10:28 Query: MERGE (n:TransferCare {details: "s

In [11]:
kg.process_sources(sources, instructions="""
Extract only the information related to the airline's policies about animals.
Try to be the most detailed when filling up the attributes.
""")

2024-07-24 11:10:30 load_ssl_context verify=True cert=None trust_env=True http2=False
2024-07-24 11:10:30 load_verify_locations cafile='/Users/davidzimberknopf/Documents/Apps/falkordb-gemini-kg/.venv/lib/python3.11/site-packages/certifi/cacert.pem'
2024-07-24 11:10:30 Processing 1 documents
2024-07-24 11:10:30 Processing task: extract_data_step_efb50aa5-d93e-473b-898b-5b1876a35207
2024-07-24 11:10:30 Processing task: extract_data_step_efb50aa5-d93e-473b-898b-5b1876a35207
2024-07-24 11:10:30 User message:  Extract all possible entities and relations from the text below. Use the ontology provided in the system prompt. Assign textual IDs whenever required. Use double quotes for string values. It's imperative that string values are properly escaped. All formats should be consistent, for example, dates should be in the format "YYYY-MM-DD". If needed, add the correct spacing for text fields, where the text is not properly formatted.  User instructions:   Extract only the information related 

In [12]:
questions = [
    "Can I take my dog with me on board?",
    "How many falcons can I take with me on board?",
    "How does pet care work during transfer?"
]
for question in questions:
    print(kg.ask(question))

2024-07-24 11:10:48 load_ssl_context verify=True cert=None trust_env=True http2=False
2024-07-24 11:10:48 load_verify_locations cafile='/Users/davidzimberknopf/Documents/Apps/falkordb-gemini-kg/.venv/lib/python3.11/site-packages/certifi/cacert.pem'
2024-07-24 11:10:48 Cypher Prompt: 
Using the ontology provided, generate an OpenCypher statement to query the graph database returning all relevant entities, relationships, and attributes to answer the question below:
If you cannot generate a OpenCypher statement for any reason, return an empty string.
Respect the order of the relationships, the arrows should always point from the "source" to the "target".

Question: Can I take my dog with me on board?

2024-07-24 11:10:48 Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': '\nTask: Generate OpenCypher statement to query a graph database.\n\nInstructions:\nUse only the provided entities, relationships types 

I don't have the specific information about whether you can take your dog on board.


2024-07-24 11:10:53 receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Thu, 11 Jul 2024 08:10:53 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'falkordb'), (b'openai-processing-ms', b'1067'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'22000000'), (b'x-ratelimit-remaining-requests', b'9999'), (b'x-ratelimit-remaining-tokens', b'21998433'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'4ms'), (b'x-request-id', b'req_85670c0d9aab18c4a97a031a7d573ce7'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=M56LAZmvuZJeKYxZCVDn9G70GD1OyEO5449bdRCqWGI-1720685453-1.0.1.1-nA3rkDa2WFT9sOFRiQVMcYW6NweVQPxYc_MPnhVa0zCoizvhxAJZNElYxX.gTLoFgHKMEgeKUmsx5UkjC6WfTA; path=/; expires=Thu, 11-Jul-24 08:40:5

I don't know the answer.


2024-07-24 11:12:35 Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': '\nTask: Generate OpenCypher statement to query a graph database.\n\nInstructions:\nUse only the provided entities, relationships types and properties in the ontology.\nThe output must be only a valid OpenCypher statement.\nRespect the order of the relationships, the arrows should always point from the "start" to the "end".\nRespect the types of entities of every relationship, according to the ontology.\nThe OpenCypher statement must return all the relevant entities, not just the attributes requested.\nThe output of the OpenCypher statement will be passed to another model to answer the question, hence, make sure the OpenCypher statement returns all relevant entities, relationships, and attributes.\nIf the answer required multiple entities, return all the entities, relations, relationships, and their attributes.\nIf you cannot genera

Pets will be cared for, walked, and monitored if the transfer in Doha exceeds 3 hours.
