# Testing Queries

Note: For now, we are lucky that the imports work, because we are using relative imports, and at one point, it will cause errors.

## IMPORTS

In [2]:
import os
import sys
import random
import requests
import argparse
from typing import List, Set

import numpy as np
import pandas as pd

In [3]:
from db.yagodb import YagoDB
from db.constants.main import YAGO_ALL_ENTITY_COUNT, YAGO_FACTS_ENTITY_COUNT
from db.functions.entity import get_random_entities_query

In [4]:
from utils.constants import YAGO_ENTITY_STORE_DB_PATH, YAGO_PREFIXES_PATH, YAGO_ENDPOINT_URL
from utils.functions import get_prefixes, get_url_from_prefix_and_id, get_triples_query, \
    get_triples_multiple_subjects_query, query_kg

### CONSTANTS

In [None]:
SPARQL_COLUMNS_DICT = {
    "subject": "subject",
    "predicate": "predicate",
    "object": "object"
}

### FUNCTIONS

In [6]:
def random_walk(self, depth: int = 3) -> List[str]:
    """Random walk on the YAGO knowledge graph.

    Args:
    - depth: Depth of the walk

    Returns:
    - A list of node IDs visited during the walk
    """
    random_entity = query_random_entities(yago_db)

    subject = get_url_from_prefix_and_id(PREFIXES, random_entity[0][0])

    walk = [subject]
    for _ in range(depth):
        print(walk)
        triple = query_triple(YAGO_ENDPOINT_URL, f"<{walk[-1]}>")
        if triple is None:
            break
        walk.append(triple["predicate"]["value"])
        walk.append(triple["object"]["value"])
    return walk

In [7]:
def get_triples_from_response(response: dict, *,
    sparql_columns_dict: dict = SPARQL_COLUMNS_DICT) -> pd.DataFrame:
    """
    Extracts triples from the response of a SPARQL query.
    """
    triples = []
    for row in response["results"]["bindings"]:
        triple = {}
        for key, value in row.items():
            triple[sparql_columns_dict[key]] = value["value"]
        triples.append(triple)
    return pd.DataFrame(triples)

In [70]:
def sample_triple_for_entity_as_list(triples_df: pd.DataFrame, entity: str, *, 
    sparql_columns_dict: dict = SPARQL_COLUMNS_DICT) -> List[str]:
    """
    Samples triples for a given entity.
    """
    if entity is None:
        return [None, None]
    matched_triples_df = triples_df[triples_df[sparql_columns_dict["subject"]] == entity]\
        [[sparql_columns_dict["predicate"], sparql_columns_dict["object"]]]
    if len(matched_triples_df) == 0:
        return [None, None]
    sampled_triple = matched_triples_df.sample(n=1, replace=False).iloc[0]
    return [sampled_triple[sparql_columns_dict["predicate"]], sampled_triple[sparql_columns_dict["object"]]]

In [9]:
yago_db = YagoDB(YAGO_ENTITY_STORE_DB_PATH)

In [10]:
query1 = get_random_entities_query(num_of_entities=10)
entities = yago_db.query(query1)
entities_df = pd.DataFrame([f"{entity[1]}" for entity in entities], columns=["entity0"])

In [11]:
query2 = get_triples_multiple_subjects_query(
    entities=[f"<{entity}>" for entity in entities_df["entity0"].tolist()], 
    columns_dict=SPARQL_COLUMNS_DICT
)
print(query2)


    SELECT ?subject ?predicate ?object WHERE {
        VALUES ?subject { <http://yago-knowledge.org/resource/Francis_Fenwick_Pearson_Q76229521> <http://yago-knowledge.org/resource/Vladimir_Plekunov_Q4364816> <http://yago-knowledge.org/resource/Lorenz_Banfi_Q13100915> <http://yago-knowledge.org/resource/John_Warr> <http://yago-knowledge.org/resource/Fakulta_Mechaniky_A_Matematiky_Novosibirské_Státní_Univerzity_Q4292118> <http://yago-knowledge.org/resource/Nyadzati_Q31419332> <http://yago-knowledge.org/resource/Scott_Brennan__u0028_rower_u0029_> <http://yago-knowledge.org/resource/Li_Qian_Q64514430> <http://yago-knowledge.org/resource/Protestant_Church__U0028_Kühnhausen_U0029__Q63197068> <http://yago-knowledge.org/resource/Trolleybus_Transport_In_Opava_Q9361770> }
        ?subject ?predicate ?object
        FILTER isIRI(?object)
    }
    


In [12]:
response = query_kg(YAGO_ENDPOINT_URL, query2)
print(response)

{'head': {'vars': ['subject', 'predicate', 'object']}, 'results': {'bindings': [{'subject': {'type': 'uri', 'value': 'http://yago-knowledge.org/resource/Vladimir_Plekunov_Q4364816'}, 'predicate': {'type': 'uri', 'value': 'http://schema.org/award'}, 'object': {'type': 'uri', 'value': 'http://yago-knowledge.org/resource/Order_Of_The_Patriotic_War_2Nd_Class_Q18200478'}}, {'subject': {'type': 'uri', 'value': 'http://yago-knowledge.org/resource/Vladimir_Plekunov_Q4364816'}, 'predicate': {'type': 'uri', 'value': 'http://schema.org/award'}, 'object': {'type': 'uri', 'value': 'http://yago-knowledge.org/resource/Order_of_the_Badge_of_Honour'}}, {'subject': {'type': 'uri', 'value': 'http://yago-knowledge.org/resource/Vladimir_Plekunov_Q4364816'}, 'predicate': {'type': 'uri', 'value': 'http://schema.org/award'}, 'object': {'type': 'uri', 'value': 'http://yago-knowledge.org/resource/Medal__u0022_For_the_Defence_of_Moscow_u0022_'}}, {'subject': {'type': 'uri', 'value': 'http://yago-knowledge.org/re

In [13]:
triples = get_triples_from_response(response)

In [14]:
triples.head()

Unnamed: 0,subject,predicate,object
0,http://yago-knowledge.org/resource/Vladimir_Pl...,http://schema.org/award,http://yago-knowledge.org/resource/Order_Of_Th...
1,http://yago-knowledge.org/resource/Vladimir_Pl...,http://schema.org/award,http://yago-knowledge.org/resource/Order_of_th...
2,http://yago-knowledge.org/resource/Vladimir_Pl...,http://schema.org/award,http://yago-knowledge.org/resource/Medal__u002...
3,http://yago-knowledge.org/resource/Vladimir_Pl...,http://schema.org/gender,http://yago-knowledge.org/resource/Male_gender
4,http://yago-knowledge.org/resource/Vladimir_Pl...,http://schema.org/knowsLanguage,http://yago-knowledge.org/resource/Russian_lan...


In [15]:
triples.groupby(SPARQL_COLUMNS_DICT["subject"]).sample(1)

Unnamed: 0,subject,predicate,object
23,http://yago-knowledge.org/resource/Francis_Fen...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://schema.org/Person
36,http://yago-knowledge.org/resource/John_Warr,http://schema.org/alumniOf,http://yago-knowledge.org/resource/Emmanuel_Co...
17,http://yago-knowledge.org/resource/Li_Qian_Q64...,http://schema.org/worksFor,http://yago-knowledge.org/resource/Zhangjiagan...
10,http://yago-knowledge.org/resource/Lorenz_Banf...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q13100915
12,http://yago-knowledge.org/resource/Nyadzati_Q3...,http://schema.org/location,http://yago-knowledge.org/resource/Mashonaland...
35,http://yago-knowledge.org/resource/Scott_Brenn...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q926208
28,http://yago-knowledge.org/resource/Trolleybus_...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q9361770
1,http://yago-knowledge.org/resource/Vladimir_Pl...,http://schema.org/award,http://yago-knowledge.org/resource/Order_of_th...


In [16]:
triples[triples[SPARQL_COLUMNS_DICT["subject"]] == entities_df.iloc[2]["entity0"]]

Unnamed: 0,subject,predicate,object
8,http://yago-knowledge.org/resource/Lorenz_Banf...,http://schema.org/gender,http://yago-knowledge.org/resource/Male_gender
9,http://yago-knowledge.org/resource/Lorenz_Banf...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://yago-knowledge.org/resource/Politician
10,http://yago-knowledge.org/resource/Lorenz_Banf...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q13100915


In [74]:
entities_hop_1 = entities_df.apply(lambda row: sample_triple_for_entity_as_list(triples_df=triples, entity=row["entity0"]), 
    axis=1, result_type="expand").rename(columns={0: "predicate1", 1: "entity1"})

In [76]:
entities_df[["predicate1", "entity1"]] = entities_hop_1

In [77]:
entities_df

Unnamed: 0,entity0,predicate1,entity1
0,http://yago-knowledge.org/resource/Francis_Fen...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q76229521
1,http://yago-knowledge.org/resource/Vladimir_Pl...,http://schema.org/award,http://yago-knowledge.org/resource/Medal__u002...
2,http://yago-knowledge.org/resource/Lorenz_Banf...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q13100915
3,http://yago-knowledge.org/resource/John_Warr,http://schema.org/gender,http://yago-knowledge.org/resource/Male_gender
4,http://yago-knowledge.org/resource/Fakulta_Mec...,,
5,http://yago-knowledge.org/resource/Nyadzati_Q3...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q31419332
6,http://yago-knowledge.org/resource/Scott_Brenn...,http://schema.org/birthPlace,http://yago-knowledge.org/resource/Hobart
7,http://yago-knowledge.org/resource/Li_Qian_Q64...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q64514430
8,http://yago-knowledge.org/resource/Protestant_...,,
9,http://yago-knowledge.org/resource/Trolleybus_...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://yago-knowledge.org/resource/Trolleybus_...
