In [1]:
import knowledge_graph_pipeline as pipeline

# Build Knowledge Graph

In [2]:
sql_file = "Dump20250409.sql"
parsed_schema = pipeline.parse_schema(sql_file)
parsed_schema

{'attribute': {'columns': [{'name': 'id',
    'type': 'BIGINT',
    'constraints': ['NOT NULL', 'AUTO_INCREMENT']},
   {'name': 'object_id', 'type': 'BIGINT', 'constraints': ['NOT NULL']},
   {'name': 'artist', 'type': 'VARCHAR(100)', 'constraints': ['DEFAULT NULL']},
   {'name': 'art_style',
    'type': 'VARCHAR(100)',
    'constraints': ['DEFAULT NULL']},
   {'name': 'material',
    'type': 'VARCHAR(100)',
    'constraints': ['DEFAULT NULL']},
   {'name': 'created_at',
    'type': 'DATETIME',
    'constraints': ['NOT NULL', 'DEFAULT CURRENT_TIMESTAMP()']},
   {'name': 'updated_at',
    'type': 'DATETIME',
    'constraints': ['DEFAULT NULL', 'ON UPDATE CURRENT_TIMESTAMP()']}],
  'primary_keys': ['id'],
  'foreign_keys': [{'name': 'fk_museum_object_attribute',
    'foreign_key': 'object_id',
    'reference_table': 'museum_object',
    'reference_column': 'id'}]},
 'entrance': {'columns': [{'name': 'id',
    'type': 'BIGINT',
    'constraints': ['NOT NULL', 'AUTO_INCREMENT']},
   {'name

In [4]:
knowledge_graph = pipeline.build_knowledge_graph(parsed_schema)

In [5]:
pipeline.visualize_kg_interactive(knowledge_graph, filename='kg.html')

kg.html


# Save Knowlege Graph

In [6]:
pipeline.save_kg_graphml(knowledge_graph, 'sql_knowledge_graph')

# Load Knowledge Graph

In [2]:
loaded_knowledge_graph = pipeline.load_kg_graphml('sql_knowledge_graph')

# Embed Query To KG

In [3]:
sql_query = """SELECT mo.id AS object_id, mo.title, a.material, a.art_style FROM museum_object mo JOIN location l ON mo.origin_location_id = l.id JOIN attribute a ON mo.id = a.object_id WHERE l.country = 'Italy';
"""

In [4]:
parsed_query = pipeline.parse_sql(sql_query)
parsed_query

QueryComponents(raw_query="SELECT mo.id AS object_id, mo.title, a.material, a.art_style FROM museum_object mo JOIN location l ON mo.origin_location_id = l.id JOIN attribute a ON mo.id = a.object_id WHERE l.country = 'Italy';\n", query_type='SELECT', tables={'location', 'attribute', 'museum_object'}, columns={'location.country', 'attribute.art_style', 'attribute.material', 'location.id', 'attribute.object_id', 'museum_object.id', 'museum_object.origin_location_id', 'museum_object.title'}, joins=[{'source': 'museum_object', 'target': 'location', 'type': 'JOIN', 'conditions': [{'left': 'origin_location_id', 'right': 'id', 'operator': '='}]}, {'source': 'museum_object', 'target': 'attribute', 'type': 'JOIN', 'conditions': [{'left': 'id', 'right': 'object_id', 'operator': '='}]}], conditions=[{'left': 'l.country', 'operator': 'eq', 'right': "'Italy'"}], aggregations=set(), subqueries=set(), group_by=[], order_by=[], limit=None, having=[], ctes={})

In [5]:
knowledge_graph_update, query_id = pipeline.update_kg_with_query(loaded_knowledge_graph, parsed_query)

query_7461764137375118208


In [6]:
query_id

'query_7461764137375118208'

In [7]:
pipeline.visualize_query(knowledge_graph_update, query_id)

query_query_7461764137375118208.html


# Extract Data From KG Query

In [8]:
pipeline.extract_query_labels(loaded_knowledge_graph, query_id)['tables']

[{'id': 'table_location', 'label': 'location'},
 {'id': 'table_attribute', 'label': 'attribute'},
 {'id': 'table_museum_object', 'label': 'museum_object'}]

In [9]:
pipeline.extract_query_labels(loaded_knowledge_graph, query_id)['columns']

[{'id': 'column_museum_object.title',
  'label': 'title',
  'table': 'museum_object'},
 {'id': 'column_museum_object.id', 'label': 'id', 'table': 'museum_object'},
 {'id': 'column_attribute.art_style',
  'label': 'art_style',
  'table': 'attribute'},
 {'id': 'column_location.country', 'label': 'country', 'table': 'location'},
 {'id': 'column_attribute.object_id',
  'label': 'object_id',
  'table': 'attribute'},
 {'id': 'column_location.id', 'label': 'id', 'table': 'location'},
 {'id': 'column_attribute.material',
  'label': 'material',
  'table': 'attribute'}]