### Graph Querying

In [None]:
import os
os.chdir("/work/submit/mcgreivy/beauty-in-stats/src/graph_rag")

from GraphQueryAgent import GraphQueryAgent

uri = "neo4j+s://2d257b33.databases.neo4j.io"
username = os.getenv("NEO4J_USERNAME")
password = os.getenv("NEO4J_PASSWORD")
agent = GraphQueryAgent(uri, username, password)

In [10]:
query = "What are the dominant systematic uncertainties in measurements of the B⁰ mixing frequency (Δm_d)? What are typical magnitudes?"
answer = agent.query(query)

In [12]:
print("\nRaw Result:", answer["raw_results"])


Raw Result: {'columns': ['uncertainty_source', 'uncertainty_type', 'uncertainty_description', 'arxiv_id', 'observable_name', 'observable_arxiv_id', 'typical_magnitude', 'avg_importance'], 'results': [{'uncertainty_source': 'Signal model and fit procedure uncertainties', 'uncertainty_type': 'internal', 'uncertainty_description': 'Systematic uncertainties arising from the choice of signal models used in fits, including invariant mass and decay time resolution PDFs, variations between different signal models such as triple-Gaussian and double-Gaussian functions, alternative methods like sideband subtraction, allowing signal shape parameters to vary in decay time bins, and uncertainties related to signal modelling such as choice of mass distribution shape, decay time resolution function variations, and potential tails in the decay time resolution distribution. Also includes systematic uncertainties related to biases and corrections in fit procedures, including signal yield and yield ratio

In [11]:
print("Query:", query)
print("\nCypher Query:", answer["cypher_query"])
print("\nExplanation:", answer["explanation"])
print("\nRaw Result:", answer["raw_results"])
print("\nSynthesized Answer:\n", answer["synthesized_answer"])

Query: What are the dominant systematic uncertainties in measurements of the B⁰ mixing frequency (Δm_d)? What are typical magnitudes?

Cypher Query: MATCH (o:observable) WHERE o.type CONTAINS "physical_constant" WITH o, vector.similarity.cosine(o.embedding, $("B0 mixing frequency, Δm_d, B0 oscillation frequency, neutral B meson mixing frequency")) AS similarity WHERE similarity > 0.1 ORDER BY similarity DESC LIMIT 5 MATCH (u:uncertainty_source)-[r:affects]-(o) WHERE u.type CONTAINS "internal" OR u.type CONTAINS "external" RETURN u.name AS uncertainty_source, u.type AS uncertainty_type, u.description AS uncertainty_description, u.arxiv_id AS arxiv_id, o.name AS observable_name, o.arxiv_id AS observable_arxiv_id, r.magnitude AS typical_magnitude, AVG(r.ranking) AS avg_importance ORDER BY avg_importance ASC LIMIT 20

Explanation: This Cypher query finds the dominant systematic uncertainties affecting measurements of the B⁰ mixing frequency (Δm_d) by first identifying observables semantica

In [6]:
print(agent.process_cypher_query("""MATCH (o:observable) WHERE o.type CONTAINS 'physical_constant' WITH o ORDER BY vector.similarity.cosine(o.embedding, $("CP violation")) DESC LIMIT 20
MATCH (u:uncertainty_source)-[r:affects]-(o)
WITH u, o, r
WHERE u.type <> 'statistical'
WITH u.name AS uncertainty_name, u.description AS uncertainty_description, u.arxiv_id AS arxiv_id, o.name AS observable_name, o.arxiv_id AS observable_arxiv, r.ranking AS ranking, r.magnitude AS magnitude
WITH uncertainty_name, uncertainty_description, arxiv_id, COLLECT(DISTINCT [observable_name, observable_arxiv]) AS observables, AVG(ranking) AS avg_ranking, COLLECT(DISTINCT magnitude) AS magnitudes
RETURN uncertainty_name, uncertainty_description, arxiv_id, observables, avg_ranking, magnitudes
ORDER BY avg_ranking ASC
LIMIT 20
"""))

MATCH (o:observable) WHERE o.type CONTAINS 'physical_constant' WITH o ORDER BY vector.similarity.cosine(o.embedding, [-0.002668404, 0.014479412, -0.036080528, 0.022082519, -0.015610024, -0.023077099, -0.0221563, 0.002087227, -0.018545546, -0.002730848, 0.023635833, -0.03224369, 0.018095419, -0.0036364684, 0.021645885, -0.032337707, 0.031136269, -0.0114782285, -0.031741057, -0.02071223, 0.023705274, -0.044099282, -0.00082438457, 0.04496398, -0.03928942, 0.018980576, -0.0042926036, -0.016523652, -0.035786934, -0.013703386, -0.027106848, -0.049007747, 0.027676195, -0.0625465, 0.0032790252, -0.04882572, 0.0006639012, 0.0019009948, -0.052226122, 0.024750032, 0.01298595, -0.0620485, 0.00657875, -0.038730223, 0.032903392, -0.021903692, 0.004430915, -0.036377124, 0.0009072374, -0.014753038, 0.021476498, 0.043894492, 0.017962797, -0.051165525, 0.05738257, 0.022970889, -0.003658176, -0.01225404, -0.04197343, -0.04240705, -0.025960611, -0.038928475, 0.023840286, 0.022106187, -0.008647577, 0.10789