In [8]:

## Load packages

import asyncio
import pandas as pd
import os
import openai
from openai import OpenAI
from dotenv import load_dotenv
from sqlalchemy import create_engine
from tqdm import tqdm
from string import Template
import json 
from neo4j import GraphDatabase
from timeit import default_timer as timer
import psycopg2
from pprint import pprint
import igraph as ig
import leidenalg as la
import re
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [9]:
from chatlse.embeddings import compute_text_embedding

In [7]:
## OpenAI API credentials
load_dotenv()

openai_api_key= os.getenv('OPENAI_API_KEY')
embed_model = os.getenv('OLLAMA_EMBED_MODEL')

## Neo4j credentials

neo4j_url = os.getenv("NEO4J_URL")
neo4j_user = os.getenv("NEO4J_USERNAME")
neo4j_password = os.getenv("NEO4J_PASSWORD")
gds = GraphDatabase.driver(neo4j_url, auth = (neo4j_user, neo4j_password))

In [11]:
def ingestion_pipeline(cypher_statements):
    for i, stmt in enumerate(cypher_statements):
        print(f"Executing cypher statement {i+1} of {len(cypher_statements)}")
        try:
            gds.execute_query(stmt)
        except Exception as e:
            # Log the failed statement to a file
            with open("files/failed_statements.txt", "a") as f:  # Use 'a' to append to the file instead of overwriting it
                f.write(f"{stmt} - Exception: {e}\n")

In [46]:
retrieve_data = '''
    MATCH (n)
    RETURN n.doc_id as doc_id, n.name as name'''

results = gds.execute_query(retrieve_data)    
nodes = [record.data() for record in results[0]]

In [45]:
for node in nodes:
    name = node['name']
    doc_id = node['doc_id']
    if name:
        embeddings = compute_text_embedding(name, embed_model)
        gds.execute_query(f'''
            MATCH (n)
            WHERE n.doc_id = "{doc_id}"
            SET n.embedding = {embeddings}
        ''')

[{'doc_id': '00019fdfa72ca963b5d236470f52e2e4', 'name': None},
 {'doc_id': '00019fdfa72ca963b5d236470f52e2e4', 'name': 'David Stainforth'},
 {'doc_id': '00019fdfa72ca963b5d236470f52e2e4',
  'name': 'Public LSE Event with David Stainforth'},
 {'doc_id': '001a5f6677681e52d4bf6d29614b2983',
  'name': 'Wellbeing and Mental Health Policy'},
 {'doc_id': '001a5f6677681e52d4bf6d29614b2983', 'name': 'Disability Policy'},
 {'doc_id': '001a5f6677681e52d4bf6d29614b2983',
  'name': 'Staff Counselling Services'},
 {'doc_id': '001a5f6677681e52d4bf6d29614b2983',
  'name': 'Employee Assistance Programme'},
 {'doc_id': '001a5f6677681e52d4bf6d29614b2983',
  'name': 'Staff Wellbeing Webpages'},
 {'doc_id': '001a68fb3453d90cb36daa76f0f3da19',
  'name': 'Student Services Centre'},
 {'doc_id': '001a68fb3453d90cb36daa76f0f3da19',
  'name': 'Temporary Counter Space'},
 {'doc_id': '0022b72249c01371d18c5ac97509b584',
  'name': 'Department of Social Policy'},
 {'doc_id': '0022b72249c01371d18c5ac97509b584',
  'nam