# Testing Queries

Note: For now, we are lucky that the imports work, because we are using relative imports, and at one point, it will cause errors.

## IMPORTS

In [1]:
import os
import sys
import random
import requests
import argparse
from typing import List, Set

import numpy as np
import pandas as pd

In [2]:
from db.yagodb import YagoDB
from db.constants.main import YAGO_ALL_ENTITY_COUNT, YAGO_FACTS_ENTITY_COUNT
from db.functions.entity import get_random_entities_query

In [None]:
from utils.random_walk

In [9]:
from utils.constants import YAGO_ENTITY_STORE_DB_PATH, YAGO_PREFIXES_PATH, YAGO_ENDPOINT_URL
from utils.prefix import get_prefixes, get_url_from_prefix_and_id
from utils.random_walk import random_walks_multiple, single_hop_multiple_entities, get_triples_from_response, \
    sample_triple_for_entity_as_list

ModuleNotFoundError: No module named 'constants'

### CONSTANTS

In [4]:
SPARQL_COLUMNS_DICT = {
    "subject": "subject",
    "predicate": "predicate",
    "object": "object"
}

### FUNCTIONS

In [6]:
def get_triples_from_response(response: dict, *,
    sparql_columns_dict: dict = SPARQL_COLUMNS_DICT) -> pd.DataFrame:
    """
    Extracts triples from the response of a SPARQL query.
    """
    triples = []
    for row in response["results"]["bindings"]:
        triple = {}
        for key, value in row.items():
            triple[sparql_columns_dict[key]] = value["value"]
        triples.append(triple)
    return pd.DataFrame(triples)

In [7]:
def sample_triple_for_entity_as_list(triples_df: pd.DataFrame, entity: str, *, 
    sparql_columns_dict: dict = SPARQL_COLUMNS_DICT) -> List[str]:
    """
    Samples triples for a given entity.
    """
    if entity is None:
        return [None, None]
    matched_triples_df = triples_df[triples_df[sparql_columns_dict["subject"]] == entity]\
        [[sparql_columns_dict["predicate"], sparql_columns_dict["object"]]]
    if len(matched_triples_df) == 0:
        return [None, None]
    sampled_triple = matched_triples_df.sample(n=1, replace=False).iloc[0]
    return [sampled_triple[sparql_columns_dict["predicate"]], sampled_triple[sparql_columns_dict["object"]]]

In [8]:
def single_hop_multiple_entities(yago_db: YagoDB, entities_df: pd.DataFrame) -> pd.DataFrame:
    """
    Single-hop random walk on the YAGO knowledge graph.
    Takes a dataframe of entities and returns a dataframe of entities and their neighbors.
    """
    query2 = get_triples_multiple_subjects_query(
        entities=[f"<{entity}>" for entity in entities_df["entity0"].tolist()], 
        columns_dict=SPARQL_COLUMNS_DICT
    )
    response = query_kg(YAGO_ENDPOINT_URL, query2)
    triples = get_triples_from_response(response)
    entities_hop_1 = entities_df.apply(
        lambda row: sample_triple_for_entity_as_list(triples_df=triples, entity=row["entity0"]), 
        axis=1, result_type="expand").rename(columns={0: "predicate1", 1: "entity1"})
    return entities_hop_1

In [9]:
def random_walks_multiple(yago_db: YagoDB, *, num_of_entities: int = 10, depth: int = 3) -> pd.DataFrame:
    """
    Random walks on the YAGO knowledge graph.
    """
    query1 = get_random_entities_query(num_of_entities=num_of_entities)
    entities = yago_db.query(query1)
    entities_df = pd.DataFrame([f"{entity[1]}" for entity in entities], columns=["entity0"])

    for i in range(depth - 1):
        entities_single_hop = single_hop_multiple_entities(yago_db, entities_df)
        entities_df[[f"predicate{i+1}", f"entity{i+1}"]] = entities_single_hop

    return entities_df

### Experiment Single Walks

In [10]:
yago_db = YagoDB(YAGO_ENTITY_STORE_DB_PATH)

In [11]:
random_walks = random_walks_multiple(yago_db, num_of_entities=1000, depth=3)

In [12]:
random_walks

Unnamed: 0,entity0,predicate1,entity1,predicate2,entity2
0,http://yago-knowledge.org/resource/Lcs-Cl_J051...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q84515442,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://yago-knowledge.org/resource/Galaxy_cluster
1,http://yago-knowledge.org/resource/Internal_Re...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://yago-knowledge.org/resource/Science_pro...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q116115390
2,http://yago-knowledge.org/resource/Denise_Fabr...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://yago-knowledge.org/resource/Researcher,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q92654511
3,http://yago-knowledge.org/resource/Alloclusia_...,http://schema.org/parentTaxon,http://yago-knowledge.org/resource/Alloclusia_...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://schema.org/Taxon
4,http://yago-knowledge.org/resource/Micrallecto...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://schema.org/Taxon,http://schema.org/parentTaxon,http://yago-knowledge.org/resource/Micrallecto
...,...,...,...,...,...
995,http://yago-knowledge.org/resource/Thuto-Ke-Ma...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q97334283,http://schema.org/location,http://yago-knowledge.org/resource/South_Africa
996,http://yago-knowledge.org/resource/Portrait_Of...,http://schema.org/material,http://yago-knowledge.org/resource/Oil_paint_g...,http://schema.org/material,http://yago-knowledge.org/resource/Oil_paint_g...
997,http://yago-knowledge.org/resource/Ucac2_38244...,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q87341254,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q87341254
998,http://yago-knowledge.org/resource/2008_Six-re...,http://schema.org/location,http://yago-knowledge.org/resource/Bangkok,http://www.w3.org/2002/07/owl#sameAs,http://www.wikidata.org/entity/Q2084994
