In [1]:

## Load packages

import asyncio
import pandas as pd
import os
import openai
from openai import OpenAI
from dotenv import load_dotenv
from sqlalchemy import create_engine
from tqdm import tqdm
from string import Template
import json 
from neo4j import GraphDatabase
from timeit import default_timer as timer
import psycopg2
from pprint import pprint
import igraph as ig
import leidenalg as la
import re
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [2]:
from chatlse.embeddings import compute_text_embedding

In [3]:
## OpenAI API credentials
load_dotenv()

openai_api_key= os.getenv('OPENAI_API_KEY')
embed_model = os.getenv('OLLAMA_EMBED_MODEL')

## Neo4j credentials

neo4j_url = os.getenv("NEO4J_URI")
neo4j_user = os.getenv("NEO4J_USERNAME")
neo4j_password = os.getenv("NEO4J_PASSWORD")
gds = GraphDatabase.driver(neo4j_url, auth = (neo4j_user, neo4j_password))

In [5]:
retrieve_data = '''
    MATCH (n)
    RETURN n.doc_id as doc_id, n.name as name'''

results = gds.execute_query(retrieve_data)    
nodes = [record.data() for record in results[0]]

In [7]:
for node in nodes:
    name = node['name']
    doc_id = node['doc_id']
    if name:
        embeddings = await compute_text_embedding(name, embed_model)
        gds.execute_query(f'''
            MATCH (n)
            WHERE (n.doc_id = "{doc_id}" AND n.embedding IS NULL)
            SET n.embedding = {embeddings}
        ''')
        print(f"Updated node {name, doc_id} with embedding")
    else:
        print(f"Node {doc_id} has no name")


Node 00019fdfa72ca963b5d236470f52e2e4 has no name
Updated node ('David Stainforth', '00019fdfa72ca963b5d236470f52e2e4') with embedding
Updated node ('Public LSE Event with David Stainforth', '00019fdfa72ca963b5d236470f52e2e4') with embedding
Updated node ('Wellbeing and Mental Health Policy', '001a5f6677681e52d4bf6d29614b2983') with embedding
