In [1]:
import os
import json
from openai import OpenAI
import hashlib
import redis
EMBEDDING_MODEL = "text-embedding-ada-002" #"text-embedding-3-large"
LARGE_EMBEDDING_MODEL = "text-embedding-3-large"

In [2]:
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
redis_client = redis.Redis(host='localhost', port=6379, db=0, decode_responses=True)

In [3]:

def get_embedding(client, text, model):
    text_hash = hashlib.sha256(text.encode()).hexdigest()
    cache_key = f"{model}_{text_hash}"
    cached_response = redis_client.get(cache_key)

    if cached_response:
        print("found response in cache")
        return json.loads(cached_response)

    print("no response in cache, obtaining embedding from LLM")
    response = client.embeddings.create(
                    input=text,
                    model=model,
                )
    
    embedding = response.data[0].embedding
    redis_client.set(cache_key, json.dumps(embedding))
    return embedding

In [77]:
resp1 = get_embedding(client, "This is a text string to embed", EMBEDDING_MODEL)

no response in cache, obtaining embedding from LLM


In [78]:
resp2 = get_embedding(client, "This is a text string to embed", EMBEDDING_MODEL)

found response in cache


In [83]:
len(resp1)

1536

In [5]:
import neo4j
from neo4j import GraphDatabase, RoutingControl
import networkx as nx
import requests

NEO4J_URI = "neo4j://localhost:7687"

TEXT_1 = "This simulation uses ADCIRC to model storm surges in the gulf coast"
TEXT_2 = "This experiment involved the user of 3D printing to investigate wind power in Texas."
text1_embedding = get_embedding(client, TEXT_1, EMBEDDING_MODEL)
text2_embedding = get_embedding(client, TEXT_2, EMBEDDING_MODEL)

with GraphDatabase.driver(NEO4J_URI) as driver:
    query = """
    MERGE (e:Embedding {text: $text, embedding: $embedding})
    """
    driver.execute_query(query, text=TEXT_1, embedding=text1_embedding)
    driver.execute_query(query, text=TEXT_2, embedding=text2_embedding)

found response in cache
found response in cache


In [15]:
QUERY_TEXT = "rigid mass"
query_embedding = get_embedding(client, QUERY_TEXT, EMBEDDING_MODEL)
with GraphDatabase.driver(NEO4J_URI) as driver:
    query = """
    CALL db.index.vector.queryNodes('designsafeEmbeddings', 1, $embedding)
    YIELD node, score
    MATCH (node)<-[:HAS_EMBEDDING]-(p:Entity)
    RETURN p, node.text
    """
    res = driver.execute_query(query, embedding=query_embedding)
    print(res)

no response in cache, obtaining embedding from LLM
EagerResult(records=[<Record p=<Node element_id='4:4999daec-1aa7-4f85-a3b5-f5fdea73685e:75' labels=frozenset({'Entity', 'designsafe_project_hybrid_simulation_exp_substructure'}) properties={'meta_uuid': '4555910888240058856-242ac11e-0001-012', 'name': 'designsafe.project.hybrid_simulation.exp_substructure', 'description': 'MTS Controller for the shake table.', 'title': 'Rigid Mass on Seismic Isolators', 'uuid': 'NODE_cb06483b-7b27-4260-a752-01056f9d0371'}> node.text='Rigid Mass on Seismic Isolators'>], summary=<neo4j._work.summary.ResultSummary object at 0x108552240>, keys=['p', 'node.text'])
