In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from neo4j import GraphDatabase
import json
from collections import defaultdict

embedding_model_name = "BAAI/bge-large-en-v1.5"
EMBEDDING_DIM = 1024

class LawGraphEmbedder:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
        self.embed_model = HuggingFaceEmbedding(model_name=embedding_model_name)

    def generate_embedding(self, text):
        if not self.embed_model:
            print("Error: Embedding model not initialized.")
            return None
        if not text or not isinstance(text, str):
            return None
        try:
            return self.embed_model.get_text_embedding(text)
        except Exception as e:
            print(f"Error generating embedding for text '{text[:50]}...': {e}")
            return None


In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# 2. Rehydrate embedding model
print("🔄 Rehydrating embedding model…")
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5")
Settings.embed_model = embed_model
print("✅ Embedding model ready.")


# create law db

In [43]:
import os
import json
from neo4j import GraphDatabase

from llama_index.core import StorageContext, load_index_from_storage, Settings

# 1. CONFIG
PERSIST_DIR       = "./data/law_index_full/law_index"                     # the folder from Kaggle
VECTOR_STORE_PATH = os.path.join(PERSIST_DIR, "default__vector_store.json")
NEO4J_URI         = "bolt://localhost:7687"
NEO4J_USER        = "neo4j"
NEO4J_PASS        = "yourStrongPassword123"


# 3. Load saved vector_store
print(f"🔄 Loading vector store from '{VECTOR_STORE_PATH}'…")
with open(VECTOR_STORE_PATH, "r") as f:
    vector_data = json.load(f)
embedding_dict = vector_data.get("embedding_dict", {})
print(f"✅ Loaded vector store: {len(embedding_dict)} embeddings available.")

# 4. Load the index
print(f"🔄 Loading LlamaIndex storage context from '{PERSIST_DIR}'…")
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
index = load_index_from_storage(storage_context)
all_nodes = list(index.docstore.docs.values())
print(f"✅ LlamaIndex loaded: {len(all_nodes)} nodes found in index.")

# 5. Inject embeddings into each node, reporting missing in real time
missing = 0
print("🔄 Injecting embeddings into nodes…")
for idx, node in enumerate(all_nodes, start=1):
    if getattr(node, "embedding", None) is None:
        emb = embedding_dict.get(node.node_id)
        if emb:
            node.embedding = emb
        else:
            missing += 1
            print(f"  ❗ [{idx}/{len(all_nodes)}] Missing embedding for node_id: {node.node_id}")
print(f"ℹ️  Injection complete. {missing}/{len(all_nodes)} nodes are missing embeddings.\n")

# 6. Connect to Neo4j
print(f"🔄 Connecting to Neo4j at {NEO4J_URI}…")
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASS))
# Test the connection
try:
    with driver.session() as session:
        session.run("RETURN 1")
    print("✅ Connected to Neo4j successfully.\n")
except Exception as e:
    print(f"❌ Failed to connect to Neo4j: {e}")
    exit(1)

# 7. Upload nodes to Neo4j (only if none missing)
def upload_node(tx, node):
    tx.run("""
        MERGE (c:LawClause {id: $id})
        SET c.text = $text,
            c.embedding = $embedding
    """, id=node.node_id, text=node.get_text(), embedding=node.embedding)

if missing == 0:
    total = len(all_nodes)
    print(f"🚀 Uploading {total} nodes to Neo4j…")
    with driver.session() as session:
        for idx, node in enumerate(all_nodes, start=1):
            session.write_transaction(upload_node, node)
            print(f"  ✓ [{idx}/{total}] Uploaded node_id: {node.node_id}")
    print("✅ All law-chunks uploaded to Neo4j.")
else:
    print("❗ Upload aborted: some embeddings are still missing. Please check the logs above.")


🔄 Loading vector store from './data/law_index_full/law_index/default__vector_store.json'…
✅ Loaded vector store: 18306 embeddings available.
🔄 Loading LlamaIndex storage context from './data/law_index_full/law_index'…
✅ LlamaIndex loaded: 18306 nodes found in index.
🔄 Injecting embeddings into nodes…
ℹ️  Injection complete. 0/18306 nodes are missing embeddings.

🔄 Connecting to Neo4j at bolt://localhost:7687…
✅ Connected to Neo4j successfully.

🚀 Uploading 18306 nodes to Neo4j…
  ✓ [1/18306] Uploaded node_id: 81205751-c0e7-4c5f-9066-3d9854300dd9
  ✓ [2/18306] Uploaded node_id: 99a6ead3-72e4-4244-998c-e629d30d1222


  session.write_transaction(upload_node, node)


  ✓ [3/18306] Uploaded node_id: bf2c76ef-e647-403d-8050-3b90fe281de6
  ✓ [4/18306] Uploaded node_id: 6592bf09-ad9f-4f85-ad83-d00e6c69e2fe
  ✓ [5/18306] Uploaded node_id: 51085e8b-a43d-4aa2-bce6-237d31ff8cc4
  ✓ [6/18306] Uploaded node_id: b60c479a-30a9-4fd5-a9b1-1127897cf1d9
  ✓ [7/18306] Uploaded node_id: 00a7e00e-2217-4c9f-8190-85cd39f5225d
  ✓ [8/18306] Uploaded node_id: 0586874c-727b-4194-85bc-5922e6992880
  ✓ [9/18306] Uploaded node_id: a1a08785-1efa-4181-96f0-343aee84235b
  ✓ [10/18306] Uploaded node_id: 85f1e001-9552-406a-9e79-31375ad24541
  ✓ [11/18306] Uploaded node_id: cab550ab-164e-4f7b-aad7-29692c1cfdf3
  ✓ [12/18306] Uploaded node_id: f2e3e40c-08f4-424d-8000-8af2f2c32743
  ✓ [13/18306] Uploaded node_id: d3ddf36a-6b80-4c3a-b8d8-0d91d8fbe11a
  ✓ [14/18306] Uploaded node_id: ba803dda-12cd-470a-8fb3-4cf627040fd1
  ✓ [15/18306] Uploaded node_id: a751a139-f2e5-42fb-b964-3e2ee61b49b6
  ✓ [16/18306] Uploaded node_id: a8c30fb4-eb7f-4b3e-b663-1d50de62a2af
  ✓ [17/18306] Uploaded nod

In [44]:
from neo4j import GraphDatabase

# === CONFIG ===
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASS = "yourStrongPassword123"
VECTOR_INDEX_NAME = "lawClauseVectorIndex"
TOP_K = 3
SIMILARITY_THRESHOLD = 0.85

# === Connect to Neo4j ===
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASS))

# === Create SIMILAR_TO relationships ===
def link_similar_clauses(tx, top_k, threshold):
    # Get all law clause IDs
    result = tx.run("MATCH (n:LawClause) RETURN n.id AS id")
    ids = [r["id"] for r in result]

    print(f"🔄 Linking {len(ids)} law clauses…")
    for i, node_id in enumerate(ids, 1):
        # For each clause, find top-K similar clauses
        query = f"""
        MATCH (n:LawClause {{id: $node_id}})
        CALL db.index.vector.queryNodes($index_name, $top_k, n.embedding)
        YIELD node, score
        WHERE node.id <> n.id AND score > $threshold
        MERGE (n)-[:SIMILAR_TO {{score: score}}]->(node)
        """
        tx.run(query, node_id=node_id, index_name=VECTOR_INDEX_NAME, top_k=top_k, threshold=threshold)
        print(f"  ✓ [{i}/{len(ids)}] Linked node_id: {node_id}")

with driver.session() as session:
    session.write_transaction(link_similar_clauses, TOP_K, SIMILARITY_THRESHOLD)

print("✅ Relationship creation complete.")


  session.write_transaction(link_similar_clauses, TOP_K, SIMILARITY_THRESHOLD)


🔄 Linking 18461 law clauses…
  ✓ [1/18461] Linked node_id: b68c8fb4-15dd-4e3e-82d1-eacb01b4c0e7
  ✓ [2/18461] Linked node_id: 9c5ac03c-abca-429e-a167-e34e6f2adc3d
  ✓ [3/18461] Linked node_id: 69e14fad-bfc7-4370-a728-1226c92fb6a4
  ✓ [4/18461] Linked node_id: 341ed167-063a-4557-bd8e-fff6078f224b
  ✓ [5/18461] Linked node_id: a6d2d793-57ec-4c4e-9a1d-1d1797a19ce7
  ✓ [6/18461] Linked node_id: eb04390a-ac85-4f78-87da-0ff45ceb83d2
  ✓ [7/18461] Linked node_id: 0fb1f2dd-5433-4f32-a430-f4a6805a8edb
  ✓ [8/18461] Linked node_id: 02830dc6-e581-4fd7-84c8-8ad9d8c26d2b
  ✓ [9/18461] Linked node_id: ad737141-7847-462d-866f-5c6fd613deaf
  ✓ [10/18461] Linked node_id: fa33d39f-4b38-49a0-a52b-645c4a15b984
  ✓ [11/18461] Linked node_id: 6b7a5dbf-4c3c-4efe-bc4e-205c701a4e5e
  ✓ [12/18461] Linked node_id: 0dcd9a7a-ffc9-40f6-b6cb-0aebb780bb3b
  ✓ [13/18461] Linked node_id: 9b8e99e4-1f52-43a1-8727-b4a341e5bc7d
  ✓ [14/18461] Linked node_id: 51b026a5-35c8-45e7-96d3-ca7992aa3e50
  ✓ [15/18461] Linked node_i

In [3]:
from neo4j import GraphDatabase

# Setup (reuse your existing config)
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASS = "yourStrongPassword123"
VECTOR_INDEX_NAME = "lawClauseVectorIndex"


def query_law_clauses(query_text, top_k=5):
    # Embed the query
    print(f"🔍 Embedding query: '{query_text}'")
    query_vector = embed_model.get_text_embedding(query_text)

    # Connect to Neo4j
    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASS))

    results = []
    with driver.session() as session:
        cypher = f"""
        CALL db.index.vector.queryNodes($index_name, $top_k, $query_vector)
        YIELD node, score
        RETURN node.id AS id, node.text AS text, score
        ORDER BY score DESC
        """
        records = session.run(cypher, index_name=VECTOR_INDEX_NAME, top_k=top_k, query_vector=query_vector)
        for record in records:
            results.append({
                "id": record["id"],
                "score": round(record["score"], 4),
                "text": record["text"]
            })

    return results


In [32]:
import textwrap


results = query_law_clauses("allocation of public funds to foreign affairs", top_k=3)
print(results)

🔍 Embedding query: 'allocation of public funds to foreign affairs'
[{'id': '9b8e99e4-1f52-43a1-8727-b4a341e5bc7d', 'score': 0.8129, 'text': '— Les autorisations d’engagement d’un montant de un milliard soixante-dix-sept millions quatre cent vingt mille dinars (1.077.420.000 DA) et les crédits de paiement d’un montant de quatre milliards trois cent quatre-vingt-quinze millions quatre cent vingt mille dinars (4.395.420.000 DA) ouverts, au titre du budget de l’Etat, par la loi de finances pour 2023, mis à la disposition du ministre de la numérisation et des statistiques, sont répartis conformément au tableau annexé au présent décret. Art. '}, {'id': '69e14fad-bfc7-4370-a728-1226c92fb6a4', 'score': 0.8041, 'text': "2. — Le ministre des finances et le ministre des affaires étrangères et de la communauté nationale à l’étranger sont chargés, chacun en ce qui le concerne, de l’exécution du présent décret qui sera publié au Journal officiel de  la République algérienne démocratique et populaire

In [29]:
import textwrap


# === Existing Query Function ===
from neo4j import GraphDatabase

NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASS = "yourStrongPassword123"
VECTOR_INDEX_NAME = "lawClauseVectorIndex"

def query_law_clauses(query_text, top_k=2):
    print(f"🔍 Embedding query: '{query_text}'")
    query_vector = embed_model.get_text_embedding(query_text)

    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASS))
    results = []

    with driver.session() as session:
        cypher = f"""
        CALL db.index.vector.queryNodes($index_name, $top_k, $query_vector)
        YIELD node, score
        RETURN node.id AS id, node.text AS text, score
        ORDER BY score DESC
        """
        records = session.run(
            cypher,
            index_name=VECTOR_INDEX_NAME,
            top_k=top_k,
            query_vector=query_vector
        )
        for record in records:
            results.append({
                "id": record["id"],
                "score": round(record["score"], 4),
                "text": record["text"]
            })

    return results

# === Updated Compliance Function ===
def ask_compliance_question_law(question, llm, top_k=5):
    # Step 1: Retrieve top-k law clauses using external query function
    context_nodes = query_law_clauses(question, top_k=top_k)


    if not context_nodes:
        return "❗ No relevant legal clauses were found to answer this question."

    # Step 2: Format retrieved clauses for prompt
    context_text = "\n\n".join(
        f"[{node['id']} - score {node['score']}]\n{textwrap.fill(node['text'], 100)}"
        for node in context_nodes
    )

    # Step 3: Build LLM prompt
    prompt = f"""You are a legal compliance assistant knowledgeable in algerian law.
The user asked: "{question}"

Based on the following law clauses, answer clearly and cite them if relevant:

--- START OF LAW CLAUSES ---
{context_text}
--- END OF LAW CLAUSES ---

🧠 Answer:"""

    # Step 4: Use Gemini to answer
    response = llm.complete(prompt)
    return response.text.strip()


In [20]:
import os

os.environ["GOOGLE_API_KEY"] = "AIzaSyCxOWqwcHGPh8R8QK6407oxWUMHgsoRhK0"

In [30]:
from llama_index.llms.gemini import Gemini

llm = Gemini(
    model="models/gemini-1.5-flash",
    temperature=0.1
)

question = "budget allowance"
answer = ask_compliance_question_law(question, llm)

print("\n🧠 AI Answer:\n", answer)


  llm = Gemini(


🔍 Embedding query: 'budget allowance'

🧠 AI Answer:
 The provided text gives budget allowances for several Algerian ministries in 2022 and 2023, but doesn't define "budget allowance" as a legal term.  The excerpts show allocations in Algerian Dinars (DA) for:

* **2022:**  A credit of 800,000 DA for the Ministry of Relations with Parliament (clause [0d6e32b7-0f0f-4a13-9f17-2ba2601bf01a]).  Another clause mentions the annulment of credits for the Ministry of Health (clause [4e8b9522-6a94-4fda-a576-354f76716223]), but the specific amounts are annulled, not allocated.

* **2023:**  Several clauses detail significant budget allocations for various ministries.  These include:
    * The Ministry of Digitization and Statistics (clause [9b8e99e4-1f52-43a1-8727-b4a341e5bc7d]) with authorizations of engagement (AE) of 1,077,420,000 DA and payment credits (CP) of 4,395,420,000 DA.  A detailed breakdown is provided in an annexed table.
    * The Ministry of Culture and Arts (clause [0dcd9a7a-ffc9-

# create standards db

In [33]:
import os
import json
from neo4j import GraphDatabase
from llama_index.core import StorageContext, load_index_from_storage, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# === CONFIG ===
PERSIST_DIR = "./data/standards_index"  # your saved AAOIFI index
VECTOR_STORE_PATH = os.path.join(PERSIST_DIR, "default__vector_store.json")
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASS = "yourStrongPassword123"
EMBED_MODEL = "BAAI/bge-large-en-v1.5"

# === Load vector store embeddings ===
print(f"🔄 Loading vector store from '{VECTOR_STORE_PATH}'…")
with open(VECTOR_STORE_PATH, "r") as f:
    vector_data = json.load(f)
embedding_dict = vector_data.get("embedding_dict", {})
print(f"✅ Loaded vector store: {len(embedding_dict)} embeddings available.")

# === Load index and nodes ===
print(f"🔄 Loading LlamaIndex storage context from '{PERSIST_DIR}'…")
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
index = load_index_from_storage(storage_context)
all_nodes = list(index.docstore.docs.values())
print(f"✅ Loaded {len(all_nodes)} standard nodes from index.")

# === Inject embeddings ===
missing = 0
print("🔄 Injecting embeddings into standard nodes…")
for idx, node in enumerate(all_nodes, start=1):
    if getattr(node, "embedding", None) is None:
        emb = embedding_dict.get(node.node_id)
        if emb:
            node.embedding = emb
        else:
            missing += 1
            print(f"  ❗ [{idx}/{len(all_nodes)}] Missing embedding for node_id: {node.node_id}")
print(f"ℹ️  Embedding injection complete. {missing}/{len(all_nodes)} missing.\n")

# === Connect to Neo4j ===
print(f"🔄 Connecting to Neo4j at {NEO4J_URI}…")
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASS))
try:
    with driver.session() as session:
        session.run("RETURN 1")
    print("✅ Connected to Neo4j.\n")
except Exception as e:
    print(f"❌ Failed to connect to Neo4j: {e}")
    exit(1)

# === Upload standards to Neo4j under a new label: StandardClause ===
def upload_standard(tx, node):
    tx.run("""
        MERGE (s:StandardClause {id: $id})
        SET s.text = $text,
            s.embedding = $embedding
    """, id=node.node_id, text=node.get_text(), embedding=node.embedding)

if missing == 0:
    print(f"🚀 Uploading {len(all_nodes)} standard nodes to Neo4j…")
    with driver.session() as session:
        for idx, node in enumerate(all_nodes, start=1):
            session.write_transaction(upload_standard, node)
            print(f"  ✓ [{idx}/{len(all_nodes)}] Uploaded standard node_id: {node.node_id}")
    print("✅ All standard chunks uploaded to Neo4j.")
else:
    print("❗ Upload aborted: Some embeddings are missing.")


🔄 Loading vector store from './data/standards_index/default__vector_store.json'…
✅ Loaded vector store: 413 embeddings available.
🔄 Loading LlamaIndex storage context from './data/standards_index'…
✅ Loaded 413 standard nodes from index.
🔄 Injecting embeddings into standard nodes…
ℹ️  Embedding injection complete. 0/413 missing.

🔄 Connecting to Neo4j at bolt://localhost:7687…
✅ Connected to Neo4j.

🚀 Uploading 413 standard nodes to Neo4j…
  ✓ [1/413] Uploaded standard node_id: cee44340-3893-4efd-b258-ba4628cf28ee
  ✓ [2/413] Uploaded standard node_id: af891530-1961-4653-928e-664c911efe31


  session.write_transaction(upload_standard, node)


  ✓ [3/413] Uploaded standard node_id: fa0c1ac9-ebbd-4966-9884-e11743af3787
  ✓ [4/413] Uploaded standard node_id: 1f6f8063-2616-454a-8d91-408f4c296b73
  ✓ [5/413] Uploaded standard node_id: fd294918-3616-48f6-be2d-2224901da87d
  ✓ [6/413] Uploaded standard node_id: ab3f1529-858d-467e-9c45-86679dc8aa27
  ✓ [7/413] Uploaded standard node_id: d08ce5f0-71f3-4cdd-81a1-21447ea11397
  ✓ [8/413] Uploaded standard node_id: ab6397d9-9706-470b-98c0-513fff2a6595
  ✓ [9/413] Uploaded standard node_id: 0b5aa2b2-f4b5-41f4-9d38-15f8d477b87d
  ✓ [10/413] Uploaded standard node_id: 06a4f461-14c8-4fdc-be7a-eabbb24d800c
  ✓ [11/413] Uploaded standard node_id: 5432af1d-d72b-4262-b532-cef0e6de1ced
  ✓ [12/413] Uploaded standard node_id: 3a4b842c-732c-4a83-a447-92b060f386f0
  ✓ [13/413] Uploaded standard node_id: 03988199-3473-40aa-8d7c-d5a4befdc444
  ✓ [14/413] Uploaded standard node_id: 5c1643a7-fdf2-401d-a8c4-57023ce2710c
  ✓ [15/413] Uploaded standard node_id: b7d38221-61cb-47f8-a071-74bf2d31a3c2
  ✓ [1

In [36]:
from neo4j import GraphDatabase

NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASS = "yourStrongPassword123"

cypher = """
CALL db.index.vector.createNodeIndex(
  $index_name,
  $label,
  $property,
  $dimensions,
  $similarity
)
"""

params = {
    "index_name": "standardClauseVectorIndex",
    "label": "StandardClause",
    "property": "embedding",
    "dimensions": 1024,
    "similarity": "cosine"
}

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASS))
with driver.session() as session:
    session.run(cypher, **params)
print("✅ Created 'standardClauseVectorIndex' in Neo4j.")




✅ Created 'standardClauseVectorIndex' in Neo4j.


In [37]:
from neo4j import GraphDatabase
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# === CONFIG ===
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASS = "yourStrongPassword123"
VECTOR_INDEX_NAME = "standardClauseVectorIndex"  # <-- changed

# === Initialize embedding model ===
EMBED_MODEL_NAME = "BAAI/bge-large-en-v1.5"
embed_model = HuggingFaceEmbedding(model_name=EMBED_MODEL_NAME)

In [38]:


def query_standard_clauses(query_text, top_k=5):
    print(f"🔍 Embedding query for standards: '{query_text}'")
    query_vector = embed_model.get_text_embedding(query_text)

    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASS))
    results = []

    with driver.session() as session:
        cypher = f"""
        CALL db.index.vector.queryNodes($index_name, $top_k, $query_vector)
        YIELD node, score
        RETURN node.id AS id, node.text AS text, score
        ORDER BY score DESC
        """
        records = session.run(
            cypher,
            index_name=VECTOR_INDEX_NAME,
            top_k=top_k,
            query_vector=query_vector
        )

        for record in records:
            results.append({
                "id": record["id"],
                "score": round(record["score"], 4),
                "text": record["text"]
            })

    return results


In [40]:
results = query_standard_clauses("interest-based financing", top_k=3)
for r in results:
    print(f"📌 {r['id']} (score: {r['score']})\n{r['text']}\n")


🔍 Embedding query for standards: 'interest-based financing'
📌 79b46545-b65d-40fe-91c5-a6c651cb42f2 (score: 0.8024)
[5: 37], The Case of : Wadi’ah Should Be Proportionate to the Amount of Fund. 
(16)(16) Ali Al-Khafif,  Ali Al-Khafif, “Companies in Islamic Jurisprudence”“Companies in Islamic Jurisprudence”, op. 

📌 c2d2beb3-bc7f-42ff-9e03-6c0bcf57fc17 (score: 0.7974)
19 
 
BC12 Accordingly, the board held its view in line with the earlier standards, that since IFIs have multiple 
stakeholders (including shareholders, who change hand at times, and the investment account 
holders, who all contribute for the transaction and at time change hands) it would be more just, fair 
and equitable if the profits are deferred and amortized over the period of the whole transaction (i.e. 
till credit period). Additionally, the board also apprised that although it is not obligatory to give 
discount in case of early payment, but it is permissible (though not as a common practice) and at 
times customary

In [None]:
from neo4j import GraphDatabase

# === CONFIG for Standards ===
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASS = "yourStrongPassword123"
VECTOR_INDEX_NAME = "standardClauseVectorIndex"
TOP_K = 3
SIMILARITY_THRESHOLD = 0.85

# === Connect to Neo4j ===
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASS))

# === Create SIMILAR_TO relationships between StandardClause nodes ===
def link_similar_standard_clauses(tx, top_k, threshold):
    result = tx.run("MATCH (n:StandardClause) RETURN n.id AS id")
    ids = [r["id"] for r in result]

    print(f"🔄 Linking {len(ids)} standard clauses…")
    for i, node_id in enumerate(ids, 1):
        query = f"""
        MATCH (n:StandardClause {{id: $node_id}})
        CALL db.index.vector.queryNodes($index_name, $top_k, n.embedding)
        YIELD node, score
        WHERE node.id <> n.id AND score > $threshold
        MERGE (n)-[:SIMILAR_TO {{score: score}}]->(node)
        """
        tx.run(query, node_id=node_id, index_name=VECTOR_INDEX_NAME, top_k=top_k, threshold=threshold)
        print(f"  ✓ [{i}/{len(ids)}] Linked standard node_id: {node_id}")

with driver.session() as session:
    session.write_transaction(link_similar_standard_clauses, TOP_K, SIMILARITY_THRESHOLD)

print("✅ Similarity linking complete for standards.")


In [1]:
import tempfile
import subprocess
import os

def open_string_in_excel(data: str, app: str = "libreoffice"):
    """
    Saves the string `data` to a temporary CSV file and opens it in a spreadsheet program.

    Args:
        data (str): The CSV-formatted string to open.
        app (str): The spreadsheet app to use (default: 'libreoffice').
                   You can change this to 'excel', 'wps', or any CLI-launchable spreadsheet app.
    """
    with tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode='w') as tmpfile:
        tmpfile.write(data)
        tmpfile_path = tmpfile.name

    try:
        subprocess.Popen([app, tmpfile_path])
        print(f"Opened in {app}: {tmpfile_path}")
    except Exception as e:
        print(f"Failed to open the file in {app}. Error: {e}")


In [3]:
csv_string = "Name,Age,City\nAlice,30,Paris\nBob,25,Berlin"
open_string_in_excel(csv_string)


Opened in libreoffice: /tmp/tmp1jly8t7r.csv
