In [1]:
import knowledge_graph_pipeline as pipeline

# Build Knowledge Graph

In [2]:
sql_file = "schema_link_expert.sql"
parsed_schema = pipeline.parse_schema(sql_file)
parsed_schema

{'Location': {'columns': [{'name': 'zip_code',
    'type': 'VARCHAR(10)',
    'constraints': ['PRIMARY KEY']},
   {'name': 'country', 'type': 'VARCHAR(100)', 'constraints': ['NOT NULL']},
   {'name': 'createdAt',
    'type': 'DATETIME',
    'constraints': ['NOT NULL', 'DEFAULT NOW()']},
   {'name': 'updatedAt', 'type': 'DATETIME', 'constraints': []}],
  'primary_keys': [],
  'foreign_keys': []},
 'User': {'columns': [{'name': 'id',
    'type': 'BIGINT',
    'constraints': ['PRIMARY KEY', 'AUTO_INCREMENT']},
   {'name': 'full_name', 'type': 'VARCHAR(100)', 'constraints': ['NOT NULL']},
   {'name': 'email',
    'type': 'VARCHAR(100)',
    'constraints': ['NOT NULL', 'UNIQUE']},
   {'name': 'phone_numer',
    'type': 'VARCHAR(20)',
    'constraints': ['NOT NULL', 'UNIQUE']},
   {'name': 'dob', 'type': 'DATE', 'constraints': ['NOT NULL']},
   {'name': 'zip_code', 'type': 'VARCHAR(10)', 'constraints': ['NOT NULL']},
   {'name': 'createdAt',
    'type': 'DATETIME',
    'constraints': ['NOT N

In [3]:
knowledge_graph = pipeline.build_knowledge_graph(parsed_schema)

In [4]:
pipeline.visualize_kg_interactive(knowledge_graph, filename='link_kg1.html')

link_kg1.html


# Save Knowlege Graph

In [5]:
pipeline.save_kg_graphml(knowledge_graph, 'sql_knowledge_graph_link')

# Load Knowledge Graph

In [2]:
loaded_knowledge_graph = pipeline.load_kg_graphml('sql_knowledge_graph')

# Embed Query To KG

In [13]:
sql_query = """WITH monthly_ticket_sales AS (
SELECT m.id AS museum_id, m.name AS museum_name, COUNT(e.id) AS tickets_sold
FROM museum m 
JOIN entrance e ON m.id = e.museum_id 
WHERE e.entry_time >= DATE_SUB(CURDATE(), INTERVAL 1 MONTH) GROUP BY m.id ), 
museum_capacity AS ( 
SELECT m.id AS museum_id, (SUM(e.price) / COUNT(e.id)) * 100 AS estimated_capacity 
FROM museum m 
JOIN entrance e ON m.id = e.museum_id 
GROUP BY m.id ) 
SELECT mts.museum_name, mts.tickets_sold, mc.estimated_capacity 
FROM monthly_ticket_sales mts 
JOIN museum_capacity mc ON mts.museum_id = mc.museum_id 
WHERE (mts.tickets_sold / mc.estimated_capacity) >= 0;"""

In [14]:
parsed_query = pipeline.parse_sql(sql_query)
parsed_query

QueryComponents(raw_query='WITH monthly_ticket_sales AS (\nSELECT m.id AS museum_id, m.name AS museum_name, COUNT(e.id) AS tickets_sold\nFROM museum m \nJOIN entrance e ON m.id = e.museum_id \nWHERE e.entry_time >= DATE_SUB(CURDATE(), INTERVAL 1 MONTH) GROUP BY m.id ), \nmuseum_capacity AS ( \nSELECT m.id AS museum_id, (SUM(e.price) / COUNT(e.id)) * 100 AS estimated_capacity \nFROM museum m \nJOIN entrance e ON m.id = e.museum_id \nGROUP BY m.id ) \nSELECT mts.museum_name, mts.tickets_sold, mc.estimated_capacity \nFROM monthly_ticket_sales mts \nJOIN museum_capacity mc ON mts.museum_id = mc.museum_id \nWHERE (mts.tickets_sold / mc.estimated_capacity) >= 0;', query_type='SELECT', tables={'museum', 'museum_capacity', 'monthly_ticket_sales', 'entrance'}, columns={'entrance.entry_time', 'museum.name', 'entrance.id', 'monthly_ticket_sales.tickets_sold', 'museum_capacity.museum_id', 'entrance.price', 'monthly_ticket_sales.museum_name', 'entrance.museum_id', 'monthly_ticket_sales.museum_id', 

In [15]:
knowledge_graph_update, query_id = pipeline.update_kg_with_query(loaded_knowledge_graph, parsed_query)

query_-2606769074305022318


In [16]:
query_id

'query_-2606769074305022318'

In [17]:
pipeline.visualize_query(knowledge_graph_update, query_id)

query_query_-2606769074305022318.html


# Extract Data From KG Query

In [12]:
pipeline.extract_query_labels(loaded_knowledge_graph, query_id)['tables']

[{'id': 'table_User', 'label': 'User'},
 {'id': 'table_Education', 'label': 'Education'},
 {'id': 'table_User_Education', 'label': 'User_Education'}]

In [13]:
pipeline.extract_query_labels(loaded_knowledge_graph, query_id)['columns']

[{'id': 'column_User.id', 'label': 'id', 'table': 'User'},
 {'id': 'column_Education.id', 'label': 'id', 'table': 'Education'},
 {'id': 'column_User_Education.user_id',
  'label': 'user_id',
  'table': 'User_Education'},
 {'id': 'column_Education.university_name',
  'label': 'university_name',
  'table': 'Education'},
 {'id': 'column_User.email', 'label': 'email', 'table': 'User'},
 {'id': 'column_User.full_name', 'label': 'full_name', 'table': 'User'},
 {'id': 'column_User_Education.education_id',
  'label': 'education_id',
  'table': 'User_Education'},
 {'id': 'column_User_Education.graduation_date',
  'label': 'graduation_date',
  'table': 'User_Education'},
 {'id': 'column_User_Education.degree',
  'label': 'degree',
  'table': 'User_Education'},
 {'id': 'column_User_Education.specialization',
  'label': 'specialization',
  'table': 'User_Education'}]