In [1]:
from dotenv import load_dotenv

load_dotenv()

import logging
import sys

date_strftime_format = "%Y-%m-%y %H:%M:%S"
logging.basicConfig(
    level=logging.DEBUG, format="%(asctime)s %(message)s", datefmt=date_strftime_format
)

from falkordb_gemini_kg.models.openai import OpenAiGenerativeModel
from falkordb_gemini_kg import KnowledgeGraph, Ontology, KnowledgeGraphModelConfig
from falkordb_gemini_kg.classes.source import Source, HTML
from falkordb_gemini_kg.classes.entity import Entity
from falkordb_gemini_kg.classes.relation import Relation
from falkordb_gemini_kg.classes.attribute import Attribute, AttributeType
import os
import json
from falkordb import FalkorDB

model = OpenAiGenerativeModel("gpt-4o")

In [6]:
sources = [Source("./data/cleaned_qr_baggage_v7.jsonl")]

In [3]:
ontology = Ontology()

entities = [
    Entity("Region", [Attribute("name", AttributeType.STRING, True, True)]),
    Entity("Country", [Attribute("name", AttributeType.STRING, True, True)]),
    Entity("City", [Attribute("name", AttributeType.STRING, True, True)]),
    Entity("Airport", [Attribute("name", AttributeType.STRING, True, True)]),
]

relations = [
    Relation("IN_COUNTRY", "City", "Country"),
    Relation("IN_REGION", "Country", "Region"),
    Relation("IN_CITY", "Airport", "City"),
    Relation(
        "HAS_ROUTE",
        "Airport",
        "Airport",
        [
            Attribute("fare_price", AttributeType.NUMBER, False, False),
            Attribute("fare_price", AttributeType.NUMBER, False, False),
        ],
    ),
]

for entity in entities:
    ontology.add_entity(entity)

for relation in relations:
    ontology.add_relation(relation)

In [4]:
falkordb = FalkorDB()
ontology.save_to_graph(falkordb.select_graph("qatar_qa_destinations_ontology"))

kg = KnowledgeGraph(
    name="qatar_qa_destinations",
    model_config=KnowledgeGraphModelConfig.with_model(model),
    ontology=ontology,
)

2024-07-24 11:41:16 Query: MERGE (n:Region {name: "string!*"}) RETURN n


2024-07-24 11:41:16 Query: MERGE (n:Country {name: "string!*"}) RETURN n
2024-07-24 11:41:16 Query: MERGE (n:City {name: "string!*"}) RETURN n
2024-07-24 11:41:16 Query: MERGE (n:Airport {name: "string!*"}) RETURN n
2024-07-24 11:41:16 Query: MATCH (s:City) MATCH (t:Country) MERGE (s)-[r:IN_COUNTRY {}]->(t) RETURN r
2024-07-24 11:41:16 Query: MATCH (s:Country) MATCH (t:Region) MERGE (s)-[r:IN_REGION {}]->(t) RETURN r
2024-07-24 11:41:16 Query: MATCH (s:Airport) MATCH (t:City) MERGE (s)-[r:IN_CITY {}]->(t) RETURN r
2024-07-24 11:41:16 Query: MATCH (s:Airport) MATCH (t:Airport) MERGE (s)-[r:HAS_ROUTE {fare_price: "number", fare_price: "number"}]->(t) RETURN r


In [8]:
kg.process_sources(sources, instructions="""
Extract all the information about Qatar Airways routes from the sources provided.
The generated knowledge graph should be able to answer questions such as "Does Qatar Airways fly to Melbourne with a stopover in Doha?"
""")

2024-07-24 10:06:58 load_ssl_context verify=True cert=None trust_env=True http2=False
2024-07-24 10:06:58 load_verify_locations cafile='/Users/davidzimberknopf/Documents/Apps/falkordb-gemini-kg/.venv/lib/python3.11/site-packages/certifi/cacert.pem'
2024-07-24 10:06:58 Processing 143 documents
2024-07-24 10:06:58 Processing task: extract_data_step_2b90481a-5dbd-43fe-8e4f-fa09d097ecb8
2024-07-24 10:06:58 Processing task: extract_data_step_4cac7101-5c58-452f-8f1e-34dfc81cea1c
2024-07-24 10:06:58 Processing task: extract_data_step_5d46be32-9f40-4376-8423-3b0607bd3d05
2024-07-24 10:06:58 Processing task: extract_data_step_c94b51c3-c390-4213-b755-f274cd23e01e
2024-07-24 10:06:58 Processing task: extract_data_step_2b90481a-5dbd-43fe-8e4f-fa09d097ecb8
2024-07-24 10:06:58 Processing task: extract_data_step_8246a947-655c-4af2-bf75-e3c602d90904
2024-07-24 10:06:58 Processing task: extract_data_step_9c65b655-c413-44aa-8c15-bfd5806b742a
2024-07-24 10:06:58 Processing task: extract_data_step_cab9dcb

Entity with label Airline not found in ontology
Entity with label Aircraft not found in ontology
Entity with label Aircraft not found in ontology
Entity with label Aircraft not found in ontology


2024-07-24 10:08:02 receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Tue, 16 Jul 2024 07:08:02 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'falkordb'), (b'openai-processing-ms', b'3054'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15552000; includeSubDomains; preload'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'22000000'), (b'x-ratelimit-remaining-requests', b'9998'), (b'x-ratelimit-remaining-tokens', b'21984863'), (b'x-ratelimit-reset-requests', b'11ms'), (b'x-ratelimit-reset-tokens', b'41ms'), (b'x-request-id', b'req_8c5f91b45bed0bb9e8150fa85e3e0758'), (b'CF-Cache-Status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8a40288e7f7083ac-MXP'), (b'Content-Encoding', b'gzip'), (b'alt-svc', b'h3=":443"; ma=86400')])
2024-07-24 10:08:02 HTTP R

Entity with label Airline not found in ontology


2024-07-24 10:08:19 receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Tue, 16 Jul 2024 07:08:19 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'falkordb'), (b'openai-processing-ms', b'19925'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15552000; includeSubDomains; preload'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'22000000'), (b'x-ratelimit-remaining-requests', b'9999'), (b'x-ratelimit-remaining-tokens', b'21996020'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'10ms'), (b'x-request-id', b'req_7fce3ae80f161fae90b01b18ba8f012d'), (b'CF-Cache-Status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8a40288ecf560e1a-MXP'), (b'Content-Encoding', b'gzip'), (b'alt-svc', b'h3=":443"; ma=86400')])
2024-07-24 10:08:19 HTTP R

Entity with label Airline not found in ontology
Entity with label Route not found in ontology


2024-07-24 10:15:01 receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Tue, 16 Jul 2024 07:15:01 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'falkordb'), (b'openai-processing-ms', b'2502'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15552000; includeSubDomains; preload'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'22000000'), (b'x-ratelimit-remaining-requests', b'9998'), (b'x-ratelimit-remaining-tokens', b'21989035'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'29ms'), (b'x-request-id', b'req_fde5f41d167d5b485b25da0dfa7d1993'), (b'CF-Cache-Status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8a4032cfbb980e45-MXP'), (b'Content-Encoding', b'gzip'), (b'alt-svc', b'h3=":443"; ma=86400')])
2024-07-24 10:15:01 HTTP Re

Entity with label Airline not found in ontology
Relations with label OPERATES not found in ontology
Relations with label OPERATES not found in ontology


2024-07-24 10:15:04 receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Tue, 16 Jul 2024 07:15:04 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'falkordb'), (b'openai-processing-ms', b'4806'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15552000; includeSubDomains; preload'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'22000000'), (b'x-ratelimit-remaining-requests', b'9999'), (b'x-ratelimit-remaining-tokens', b'21995523'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'12ms'), (b'x-request-id', b'req_aed636c35d87165fccbbfde7f1845c17'), (b'CF-Cache-Status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8a4032cfa8e4bae1-MXP'), (b'Content-Encoding', b'gzip'), (b'alt-svc', b'h3=":443"; ma=86400')])
2024-07-24 10:15:04 HTTP Re

Relations with label IN_REGIONAL not found in ontology
Relations with label IN_REGIONAL not found in ontology


2024-07-24 10:15:08 receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Tue, 16 Jul 2024 07:15:08 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'falkordb'), (b'openai-processing-ms', b'8709'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15552000; includeSubDomains; preload'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'22000000'), (b'x-ratelimit-remaining-requests', b'9995'), (b'x-ratelimit-remaining-tokens', b'21986320'), (b'x-ratelimit-reset-requests', b'24ms'), (b'x-ratelimit-reset-tokens', b'37ms'), (b'x-request-id', b'req_168e7afd533ec52f51bd5470262f96a4'), (b'CF-Cache-Status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8a4032cfb9ba59d7-MXP'), (b'Content-Encoding', b'gzip'), (b'alt-svc', b'h3=":443"; ma=86400')])
2024-07-24 10:15:08 HTTP R

Entity with label Airline not found in ontology


2024-07-24 10:16:03 receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Tue, 16 Jul 2024 07:16:03 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'falkordb'), (b'openai-processing-ms', b'4440'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15552000; includeSubDomains; preload'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'22000000'), (b'x-ratelimit-remaining-requests', b'9999'), (b'x-ratelimit-remaining-tokens', b'21995109'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'13ms'), (b'x-request-id', b'req_b334a06f93d85e5effc7918d4bfc012d'), (b'CF-Cache-Status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8a4034467aaf0dc6-MXP'), (b'Content-Encoding', b'gzip'), (b'alt-svc', b'h3=":443"; ma=86400')])
2024-07-24 10:16:03 HTTP Re

In [9]:
kg.ask("Do you fly to Melbourne with a stopover in Doha?") 

2024-07-24 10:22:02 load_ssl_context verify=True cert=None trust_env=True http2=False
2024-07-24 10:22:02 load_verify_locations cafile='/Users/davidzimberknopf/Documents/Apps/falkordb-gemini-kg/.venv/lib/python3.11/site-packages/certifi/cacert.pem'
2024-07-24 10:22:02 Cypher Prompt: 
Using the ontology provided, generate an OpenCypher statement to query the graph database returning all relevant entities, relationships, and attributes to answer the question below:
If you cannot generate a OpenCypher statement for any reason, return an empty string.
Respect the order of the relationships, the arrows should always point from the "source" to the "target".

Question: Do you fly to Melbourne with a stopover in Doha?

2024-07-24 10:22:02 Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': '\nTask: Generate OpenCypher statement to query a graph database.\n\nInstructions:\nUse only the provided entities, relatio

'Yes, you can fly to Melbourne with a stopover in Doha.'

In [6]:
kg.ask("Is there a flight from Tel Aviv to Doha?")

2024-07-24 13:11:18 load_ssl_context verify=True cert=None trust_env=True http2=False
2024-07-24 13:11:18 load_verify_locations cafile='/Users/davidzimberknopf/Documents/Apps/falkordb-gemini-kg/.venv/lib/python3.11/site-packages/certifi/cacert.pem'
2024-07-24 13:11:18 Cypher Prompt: 
Using the ontology provided, generate an OpenCypher statement to query the graph database returning all relevant entities, relationships, and attributes to answer the question below:
If you cannot generate a OpenCypher statement for any reason, return an empty string.
Respect the order of the relationships, the arrows should always point from the "source" to the "target".

Question: Is there a flight from Tel Aviv to Doha?

2024-07-24 13:11:18 Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': '\nTask: Generate OpenCypher statement to query a graph database.\n\nInstructions:\nUse only the provided entities, relationships t

'The provided context does not include specific information about existing data. Hence we cannot determine if there is a flight from Tel Aviv to Doha based on the given Cypher query alone.'

In [11]:
kg.ask("Do you fly to Melbourne with a stopover in Doha and Bangkok?") 

2024-07-24 10:36:00 load_ssl_context verify=True cert=None trust_env=True http2=False
2024-07-24 10:36:00 load_verify_locations cafile='/Users/davidzimberknopf/Documents/Apps/falkordb-gemini-kg/.venv/lib/python3.11/site-packages/certifi/cacert.pem'
2024-07-24 10:36:00 Cypher Prompt: 
Using the ontology provided, generate an OpenCypher statement to query the graph database returning all relevant entities, relationships, and attributes to answer the question below:
If you cannot generate a OpenCypher statement for any reason, return an empty string.
Respect the order of the relationships, the arrows should always point from the "source" to the "target".

Question: Do you fly to Melbourne with a stopover in Doha and Bangkok?

2024-07-24 10:36:00 Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': '\nTask: Generate OpenCypher statement to query a graph database.\n\nInstructions:\nUse only the provided entit

'Yes, there is a flight to Melbourne with stopovers in Doha and Bangkok.'