# Knowledge Graph Testing Notebook

In [22]:
from neo4j import GraphDatabase
import requests
import json
from pprint import pprint
from urllib.parse import quote

print("Imports loaded")

Imports loaded


In [23]:
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = ""
API_BASE = "http://localhost:8000"
TEST_ARXIV_ID = "1110.3385v1"
TEST_AUTHOR = "Pretesh Patel"

In [24]:
def get_driver(uri=NEO4J_URI, user=NEO4J_USER, password=NEO4J_PASSWORD):
    return GraphDatabase.driver(uri, auth=(user, password))


def run_cypher(query: str, params: dict | None = None):
    params = params or {}
    with get_driver() as driver:
        with driver.session() as session:
            result = session.run(query, **params)
            return [r.data() for r in result]

print("Neo4j helpers ready")

Neo4j helpers ready


In [25]:
stats_nodes = run_cypher(
    """
    MATCH (n)
    WITH labels(n) AS labels
    UNWIND labels AS label
    RETURN label, count(*) AS cnt
    ORDER BY cnt DESC
    """
)
print("Node counts by label:")
for r in stats_nodes:
    print(r)

stats_rels = run_cypher(
    """
    MATCH ()-[r]->()
    RETURN type(r) AS type, count(*) AS cnt
    ORDER BY cnt DESC
    """
)
print("\nRelationship counts:")
for r in stats_rels:
    print(r)

Node counts by label:
{'label': 'Paper', 'cnt': 55}
{'label': 'Author', 'cnt': 26}
{'label': 'Institution', 'cnt': 13}
{'label': 'SubCategory', 'cnt': 6}
{'label': 'Year', 'cnt': 2}
{'label': 'MainCategory', 'cnt': 1}

Relationship counts:
{'type': 'CITES', 'cnt': 48}
{'type': 'AUTHORED_BY', 'cnt': 26}
{'type': 'AFFILIATED_WITH', 'cnt': 13}
{'type': 'BELONGS_TO_SUB', 'cnt': 12}
{'type': 'BELONGS_TO_MAIN', 'cnt': 7}
{'type': 'PUBLISHED_IN', 'cnt': 7}
{'type': 'CHILD_OF', 'cnt': 6}


In [26]:
print("Check paper exists and inspect its context")
rows = run_cypher(
    """
    MATCH (p:Paper {arxiv_id: $arxiv_id})
    OPTIONAL MATCH (p)-[:AUTHORED_BY]->(a:Author)
    OPTIONAL MATCH (p)-[:BELONGS_TO_SUB]->(sc:SubCategory)
    OPTIONAL MATCH (p)-[:BELONGS_TO_MAIN]->(mc:MainCategory)
    RETURN p.arxiv_id AS arxiv_id,
           p.title AS title,
           toString(p.published_date) AS published_date,
           collect(DISTINCT a.name) AS authors,
           collect(DISTINCT sc.code) AS subcats,
           collect(DISTINCT mc.code) AS maincats
    """,
    {"arxiv_id": TEST_ARXIV_ID},
)
if not rows:
    print("Paper not found in Neo4j")
else:
    pprint(rows[0])

Check paper exists and inspect its context
{'arxiv_id': '1110.3385v1',
 'authors': ['Pretesh Patel', 'Tshilidzi Marwala'],
 'maincats': ['cs'],
 'published_date': '2011-10-15T05:39:34Z',
 'subcats': ['cs.AI'],
 'title': 'Fuzzy Inference Systems Optimization'}


In [27]:

def get_json(path: str, params: dict | None = None):
    params = params or {}
    url = f"{API_BASE.rstrip('/')}{path}"
    try:
        r = requests.get(url, params=params, timeout=20)
        r.raise_for_status()
        return r.json()
    except Exception as e:
        print(f"Request failed: {e}")
        if 'r' in locals():
            print('Status:', r.status_code, 'Text:', r.text[:500])
        return None

print("API helpers ready")

API helpers ready


In [28]:

author_q = quote(TEST_AUTHOR)
path = f"/graph/authors/{author_q}/collaborations"
res = get_json(path, params={"limit": 10})
print("API /collaborations result:")
pprint(res)


API /collaborations result:
[{'collaboration_count': 1,
  'collaborator': 'Tshilidzi Marwala',
  'shared_papers': ['1110.3385v1']}]
