In [11]:
import psycopg2, os
from postgres_utilities import connect_db, close_db
from s3_utilities import get_s3_connection_profile, S3ConnectionProfile

In [12]:
s3_connection_profile = get_s3_connection_profile()
#s3_connection_profile = S3ConnectionProfile("s3://minio-api-minio.apps.ai-dev01.kni.syseng.devcluster.openshift.com", 
#                                 "edb-aidb", "us-east-1", "false", "minio", "minio1234!", "recommender_images")

In [13]:
def create_image_retriever(conn):    
    
    with conn.cursor() as cur:
        #Drop existing objects if any
        cur.execute("DROP SERVER IF EXISTS images_s3_little CASCADE;")
        cur.execute("SELECT aidb.delete_model('recom_images');")
        cur.execute("SELECT aidb.delete_volume('images_bucket_vol');")
        cur.execute("SELECT aidb.delete_retriever('recom_images');")
        cur.execute("SELECT pgfs.delete_storage_location('images_s3_little');")
        cur.execute("DROP TABLE IF EXISTS recom_images_vector CASCADE;")


        #Using EDB's pgfs extension, let's now connect to the in-cluster s3 bucket storage location 
        create_storage_location = (
            "SELECT pgfs.create_storage_location('images_s3_little', 's3://edb-aidb',"
            "msl_id => null, "
            f"options => '{{\"endpoint\": \"{s3_connection_profile.endpoint_url}\"}}', "
            f"credentials => '{{\"access_key_id\": \"{s3_connection_profile.access_key}\", \"secret_access_key\":\"{s3_connection_profile.secret_key}\"}}'); "
            )
        cur.execute(create_storage_location)

        #Now create a volume from a PGFS storage location for use as a data source in retrievers.
        create_volume = f"SELECT aidb.create_volume('images_bucket_vol', 'images_s3_little', '{s3_connection_profile.recommender_images_path}/', 'Image');"
        cur.execute(create_volume)

        #Now let's define the model we'll use to generate embeddings
        #image_embedding_service = "https://phi-3-vision-128k-instruct-samouelian-edb-ai.apps.ai-dev01.kni.syseng.devcluster.openshift.com/v1/embeddings"

        #number embedding dimensions=3072 if we need it
        #define_model = (
        #    "select aidb.create_model('recom_images','nim_clip',"
        #    f"'{{\"model\":\"phi-3-vision-128k-instruct\", \"url\":\"{image_embedding_service}\"}}'::JSONB, "
        #    "'{\"api_key\":\"\"}'::JSONB); "            
        #)
        define_model = "SELECT aidb.create_model('recom_images', 'clip_local');"

        cur.execute(define_model)
       
        create_retriever = (
            "SELECT aidb.create_retriever_for_volume("
            "name => 'recom_images',"
            "model_name => 'recom_images',"
            "source_volume_name => 'images_bucket_vol');"
           )
        cur.execute(create_retriever)

In [14]:
def create_product_description_retriever(conn):

    with conn.cursor() as cur:
        # Run retriever for products table
        # The idea is to create a retriever for the products table so the text search can run over it.

        try:
            cur.execute("SAVEPOINT risky_operation")
            cur.execute("SELECT aidb.delete_retriever('recommend_products');")
        except Exception as e:
            cur.execute("ROLLBACK TO SAVEPOINT risky_operation") 

        cur.execute("DROP TABLE IF EXISTS recommend_products_vector CASCADE;")

        #Now let's define the model we'll use to generate embeddings
        embedding_endpoint =  os.environ.get('TEXT_EMBEDDING_ENDPOINT')
        model_name = os.environ.get('TEXT_EMBEDDING_MODEL_NAME')
        image_embedding_service = f"{embedding_endpoint}/v1/embeddings"

        drop_model_sql = "SELECT aidb.delete_model('product_descriptions_embeddings');"
        print(f"Drop model: {drop_model_sql}")
        cur.execute(drop_model_sql)

        define_model = (
            "select aidb.create_model("
            "'product_descriptions_embeddings',"
            "'embeddings',"
            f"'{{\"model\":\"{model_name}\", \"url\":\"{image_embedding_service}\"," 
            "\"dimensions\":4096}'::JSONB, "
            "'{\"api_key\":\"\"}'::JSONB, true); "
        )
        print(f"Create Model: {define_model}")

        cur.execute(define_model)

        product_retriever = """SELECT aidb.create_retriever_for_table(
                    name => 'recommend_products',
                    model_name => 'product_descriptions_embeddings',
                    source_table => 'products',
                    source_key_column => 'img_id',
                    source_data_column => 'productdisplayname',
                    source_data_type => 'Text'
                    );"""

        print(f"Product Retriever: {product_retriever}")

        cur.execute(product_retriever)

In [15]:
try:
    conn = connect_db()
    create_product_description_retriever(conn)
    create_image_retriever(conn)
    conn.commit()
except Exception as e:
    conn.rollback()
    raise(e)
finally:
    close_db(conn)

Drop model: SELECT aidb.delete_model('product_descriptions_embeddings');
Create Model: select aidb.create_model('product_descriptions_embeddings','embeddings','{"model":"None", "url":"None/v1/embeddings","dimensions":4096}'::JSONB, '{"api_key":""}'::JSONB, true); 
Product Retriever: SELECT aidb.create_retriever_for_table(
                    name => 'recommend_products',
                    model_name => 'product_descriptions_embeddings',
                    source_table => 'products',
                    source_key_column => 'img_id',
                    source_data_column => 'productdisplayname',
                    source_data_type => 'Text'
                    );
