# Testing Queries

Note: For now, we are lucky that the imports work, because we are using relative imports, and at one point, it will cause errors.

## IMPORTS

In [3]:
import os
import sys
import random
import requests
import argparse
from typing import List, Set

import numpy as np
import pandas as pd

In [4]:
from db.yagodb import YagoDB
from db.constants.main import YAGO_ALL_ENTITY_COUNT, YAGO_FACTS_ENTITY_COUNT
from db.functions.entity import get_random_entities_query

In [5]:
from utils.constants import YAGO_ENTITY_STORE_DB_PATH, YAGO_PREFIXES_PATH, YAGO_ENDPOINT_URL
from utils.functions import get_prefixes, get_url_from_prefix_and_id, get_triples_query, \
    get_triples_multiple_subjects_query, query_kg

### CONSTANTS

In [6]:
SPARQL_COLUMNS_DICT = {
    "subject": "subject",
    "predicate": "predicate",
    "object": "object"
}

### FUNCTIONS

In [7]:
def random_walk(self, depth: int = 3) -> List[str]:
    """Random walk on the YAGO knowledge graph.

    Args:
    - depth: Depth of the walk

    Returns:
    - A list of node IDs visited during the walk
    """
    random_entity = query_random_entities(yago_db)

    subject = get_url_from_prefix_and_id(PREFIXES, random_entity[0][0])

    walk = [subject]
    for _ in range(depth):
        print(walk)
        triple = query_triple(YAGO_ENDPOINT_URL, f"<{walk[-1]}>")
        if triple is None:
            break
        walk.append(triple["predicate"]["value"])
        walk.append(triple["object"]["value"])
    return walk

In [8]:
def get_triples_from_response(response: dict, *,
    sparql_columns_dict: dict = SPARQL_COLUMNS_DICT) -> pd.DataFrame:
    """
    Extracts triples from the response of a SPARQL query.
    """
    triples = []
    for row in response["results"]["bindings"]:
        triple = {}
        for key, value in row.items():
            triple[sparql_columns_dict[key]] = value["value"]
        triples.append(triple)
    return pd.DataFrame(triples)

In [9]:
def sample_triple_for_entity_as_list(triples_df: pd.DataFrame, entity: str, *, 
    sparql_columns_dict: dict = SPARQL_COLUMNS_DICT) -> List[str]:
    """
    Samples triples for a given entity.
    """
    if entity is None:
        return [None, None]
    matched_triples_df = triples_df[triples_df[sparql_columns_dict["subject"]] == entity]\
        [[sparql_columns_dict["predicate"], sparql_columns_dict["object"]]]
    if len(matched_triples_df) == 0:
        return [None, None]
    sampled_triple = matched_triples_df.sample(n=1, replace=False).iloc[0]
    return [sampled_triple[sparql_columns_dict["predicate"]], sampled_triple[sparql_columns_dict["object"]]]

In [10]:
def single_hop_multiple_entities(yago_db: YagoDB, entities_df: pd.DataFrame) -> pd.DataFrame:
    """
    Single-hop random walk on the YAGO knowledge graph.
    """
    query2 = get_triples_multiple_subjects_query(
        entities=[f"<{entity}>" for entity in entities_df["entity0"].tolist()], 
        columns_dict=SPARQL_COLUMNS_DICT
    )
    response = query_kg(YAGO_ENDPOINT_URL, query2)
    triples = get_triples_from_response(response)
    entities_hop_1 = entities_df.apply(
        lambda row: sample_triple_for_entity_as_list(triples_df=triples, entity=row["entity0"]), 
        axis=1, result_type="expand").rename(columns={0: "predicate1", 1: "entity1"})
    return entities_hop_1

In [25]:
def random_walks_multiple(yago_db: YagoDB, *, num_of_entities: int = 10, depth: int = 3) -> pd.DataFrame:
    """
    Random walks on the YAGO knowledge graph.
    """
    query1 = get_random_entities_query(num_of_entities=num_of_entities)
    entities = yago_db.query(query1)
    entities_df = pd.DataFrame([f"{entity[1]}" for entity in entities], columns=["entity0"])

    for i in range(depth - 1):
        entities_single_hop = single_hop_multiple_entities(yago_db, entities_df)
        entities_df[[f"predicate{i+1}", f"entity{i+1}"]] = entities_single_hop

    return entities_df

### Experiment Single Walks

In [12]:
yago_db = YagoDB(YAGO_ENTITY_STORE_DB_PATH)

In [26]:
random_walks = random_walks_multiple(yago_db, num_of_entities=10, depth=3)

In [27]:
random_walks

Unnamed: 0,entity0,predicate1,entity1,predicate2,entity2
0,http://yago-knowledge.org/resource/Erythema_No...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q34588049,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q34588049
1,http://yago-knowledge.org/resource/Preface_-_A...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://yago-knowledge.org/resource/Editorial,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q89235903
2,http://yago-knowledge.org/resource/Bactrocera_...,http://schema.org/parentTaxon,http://yago-knowledge.org/resource/Bactrocera,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://schema.org/Taxon
3,http://yago-knowledge.org/resource/Tyc_9222-23...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://yago-knowledge.org/resource/Star,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q75527678
4,http://yago-knowledge.org/resource/Aderonke_El...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q101044387,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://yago-knowledge.org/resource/Researcher
5,http://yago-knowledge.org/resource/Linn_County...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q69476064,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q69476064
6,http://yago-knowledge.org/resource/Nakaway_Ahk...,http://schema.org/location,http://yago-knowledge.org/resource/Canada,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q85787672
7,http://yago-knowledge.org/resource/Fung_Kam_To...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q9375302,http://schema.org/gender,http://yago-knowledge.org/resource/Male_gender
8,http://yago-knowledge.org/resource/Gilbert_Rid...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q110180569,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q110180569
9,http://yago-knowledge.org/resource/2019_Local_...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q86837342,http://schema.org/location,http://yago-knowledge.org/resource/Bulgaria
