In [1]:
import os, time, sys
from vllm import LLM
sys.path.append('../elyra_pipeline/')
from postgres_utilities import connect_db, close_db, execute_sql_results, execute_sql_results_np, execute_sql, execute_sql_results_params_np

INFO 05-05 14:50:12 [__init__.py:239] Automatically detected platform cuda.


In [2]:
embedding_endpoint =  os.environ.get('embedding_model_endpoint')
model_name = 'gritlm-7b'
number_data_rows = 1000

In [3]:
embedding_endpoint = f"{embedding_endpoint}/v1/embeddings"

In [4]:
delete_model = f"select aidb.delete_model('{model_name}');"
create_model = f"""
    select aidb.create_model(
    '{model_name}',
    'embeddings',
    '{{"model":"{model_name}", "url":"{embedding_endpoint}", 
    "dimensions":4096}}'::JSONB,
    '{{"api_key":""}}'::JSONB, true);
    """
get_data = f"select productdisplayname from products limit {number_data_rows}"

In [5]:
try:
    conn = connect_db()
    execute_sql(conn, delete_model)
    execute_sql(conn, create_model)
    results = execute_sql_results_np(conn, get_data)
except Exception as e:
    if conn is not None and not conn.closed:
        conn.rollback()
    err_msg = f"An error occurred and the transaction was rolled back: {e}"
    raise Exception(err_msg)
finally:
    close_db(conn)

In [6]:
encode_text_sql = f"select translate(cast (aidb.encode_text_batch('{model_name}', %s) as text), '\"', '')::float[]"

In [7]:
try:
    conn = connect_db()
    test_description = "Starting batch embedding test using OpenShift AI vllm model with EDB AI"
    print(test_description)
    print(len(test_description) * "-") 
    tic = time.perf_counter()
    print("Embeddings computed ...", end=" ")
    descriptions = list()
    for index, product in enumerate(results):
        descriptions.append(product[0])
    
    batch_size = 1000
    start_offset = 0
    end_offset = batch_size
    embeddings = list()
    while start_offset < number_data_rows:
        print(start_offset, end_offset, end=" ")
        embeddings = embeddings + execute_sql_results_params_np(conn, encode_text_sql, descriptions[start_offset:end_offset])[0][0]
        start_offset = end_offset
        end_offset = end_offset + batch_size
        if end_offset > number_data_rows:
            end_offset = number_data_rows
except Exception as e:
    if conn is not None and not conn.closed:
        conn.rollback()
    err_msg = f"An error occurred and the transaction was rolled back: {e}"
    raise Exception(err_msg)
finally:
    close_db(conn)

assert len(embeddings) == index+1
for e in embeddings:
    assert(len(e) == 4096)

toc = time.perf_counter()
print("")
print(f"Time to compute {number_data_rows} embeddings: {toc - tic:0.4f} seconds")
print(len(test_description) * "-") 

Starting batch embedding test using OpenShift AI vllm model with EDB AI
-----------------------------------------------------------------------
Embeddings computed ... 0 1000 
Time to compute 1000 embeddings: 11.2306 seconds
-----------------------------------------------------------------------
