In [6]:
from langchain_community.graphs import Neo4jGraph
import warnings
import os
import requests
warnings.filterwarnings('ignore')
from dotenv import load_dotenv

In [7]:
# Load from environment
load_dotenv(override=True)
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE')

In [8]:
kg = Neo4jGraph(
    url = NEO4J_URI, username = NEO4J_USERNAME, password = NEO4J_PASSWORD, database = NEO4J_DATABASE
)

## Make index of unknow department node

In [9]:
kg.query("""
CREATE VECTOR INDEX unk_name_embeddings IF NOT EXISTS
FOR (d:`Unknown Department`) ON (d.name)
OPTIONS {
  indexConfig: {
    `vector.dimensions`: 1024,
    `vector.similarity_function`: 'cosine'
  }
}
""")

[]

In [10]:
kg.query("""
  SHOW VECTOR INDEXES
  """
)

[{'id': 2,
  'name': 'unk_name_embeddings',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'VECTOR',
  'entityType': 'NODE',
  'labelsOrTypes': ['Unknown Department'],
  'properties': ['name'],
  'indexProvider': 'vector-2.0',
  'owningConstraint': None,
  'lastRead': None,
  'readCount': None}]

In [11]:
def get_embedding(text):
    response = requests.post(
        "http://localhost:11434/api/embeddings",
        json={"model": "mxbai-embed-large", "prompt": text}
    )
    response.raise_for_status()
    return response.json()["embedding"]

In [12]:
records = kg.query("""
MATCH (unk:`Unknown Department`)
WHERE unk.name IS NOT NULL
RETURN elementId(unk) AS id, unk.name as name
""")


In [13]:
for record in records:
    unk_id = record['id']
    name = record['name']
    if name:
        try:
            embedding = get_embedding(name)
            kg.query(
                """
                MATCH (u) WHERE elementId(u) = $id
                SET u.name_embeddings = $embedding
                """,
                params = {'id': unk_id, 'embedding': embedding}
            )
            print(f"✅ Embedding Unknown Dept: {name}")
        except Exception as e:
            print(f"❌ Failed for {name}: {e}")

✅ Embedding Unknown Dept: Peter
✅ Embedding Unknown Dept: Mir
✅ Embedding Unknown Dept: Mashusa
✅ Embedding Unknown Dept: Tehir


In [14]:
result = kg.query("""
    MATCH(unk: `Unknown Department`)
    WHERE unk.name IS NOT NULL
    RETURN unk.name, unk.name_embeddings
    Limit 1""")

In [15]:
print(result[0]['unk.name'])

Peter


In [16]:
print(result[0]['unk.name_embeddings'][:10])

[-0.3372099995613098, -0.7694968581199646, -0.11872391402721405, -0.7180010676383972, -0.43985849618911743, -0.13399681448936462, 0.09659945964813232, 0.10092643648386002, 0.5681824088096619, -0.08018582314252853]


In [17]:
print(len(result[0]['unk.name_embeddings']))

1024


## Ask Question

In [18]:
question = "What name is peter?"

In [19]:
embedding = get_embedding(question)

In [21]:
results = kg.query("""
WITH $embedding AS query_embedding
MATCH (u:`Unknown Department`)
WHERE u.name_embeddings IS NOT NULL
WITH u, gds.similarity.cosine(u.name_embeddings, query_embedding) AS score
RETURN u.name AS name, score
ORDER BY score DESC
""", params={"embedding": embedding})


In [22]:
for result in results:
    print(f"🔍 Match {result['name']} | Similarity: {result['score']: .4f}")

🔍 Match Peter | Similarity:  0.8495
🔍 Match Mir | Similarity:  0.4153
🔍 Match Mashusa | Similarity:  0.4065
🔍 Match Tehir | Similarity:  0.3851
