In [6]:
import psycopg2
from postgres_utilities import connect_db, close_db
from s3_utilities import get_s3_connection_profile, S3ConnectionProfile

In [7]:
#s3_connection_profile = get_s3_connection_profile()
s3_connection_profile = S3ConnectionProfile("s3://minio-api-minio.apps.ai-dev01.kni.syseng.devcluster.openshift.com", 
                                 "edb-aidb", "us-east-1", "false", "minio", "minio1234!", "recommender_images")

In [8]:
def create_image_retriever(conn):    
    
    with conn.cursor() as cur:
        #EDB aidb and pfgs extensions enable access to external data through volumes and server objects
        #Let's first delete these objects if they already exist from a prior initialization
        fdw_drop_sql = "drop server if exists images_s3_little cascade;"
        drop_model_sql = "SELECT aidb.delete_model('recom_images');"
        cur.execute(fdw_drop_sql)
        cur.execute(drop_model_sql)
        
        print(f"Step 1a: drop sql:{fdw_drop_sql}")
        print(f"Step 1b: drop sql:{drop_model_sql}")
        
        #Using EDB's pgfs extension, let's now connect to the in-cluster s3 bucket storage location using a server object
        #You can pass options and credentials to the storage locations. 
        #The options and credentials are specific to the type of storage location you're using (s3 bucket or file system)
        
        url = f"{s3_connection_profile.endpoint_url}/{s3_connection_profile.bucket_name}"
        credentials = f'{{"access_key_id": "{s3_connection_profile.access_key}", "secret_access_key":"{s3_connection_profile.secret_key}" }}'
        fdw_create_sql = (
                "CREATE SERVER images_s3_little FOREIGN DATA WRAPPER pgfs_fdw "
                f"OPTIONS (url '{url}', region '{s3_connection_profile.region}', "
                f"skip_signature '{s3_connection_profile.skip_signature}',  "
                f"credentials '{credentials}');"
            )

        cur.execute(fdw_create_sql)
        print(f"Step 2: fdw_create={fdw_create_sql}")

        #Now create a volume from a PGFS storage location for use as a data source in retrievers.
        create_volume = f"SELECT aidb.create_volume('images_bucket_vol', 'images_s3_little', '{s3_connection_profile.recommender_images_path}', 'Image');"
        cur.execute(create_volume)
        print(f"Step 3: create_volume={create_volume}")

        #Now let's define the model we'll use to generate embeddings
        image_embedding_service = "https://phi-3-vision-128k-instruct-samouelian-edb-ai.apps.ai-dev01.kni.syseng.devcluster.openshift.com/v1/embeddings"

        #number embedding dimensions=3072 if we need it
        define_model = (
            "select aidb.create_model('recom_images','nim_clip',"
            f"'{{\"model\":\"phi-3-vision-128k-instruct\", \"url\":\"{image_embedding_service}\"}}'::JSONB);"
        )

        print(f"Step 4: define_model={define_model}")

        cur.execute(define_model)
       
        create_retriever = (
            "SELECT aidb.create_retriever_for_volume("
            "name => 'recom_images',"
            "model_name => 'recom_images',"
            "source_volume_name => 'images_bucket_vol');"
           )
        
        print(f"Step 5: create_retriever={create_retriever}")

        cur.execute(create_retriever)

In [9]:
def create_product_description_retriever(conn):
    with conn.cursor() as cur:
        # Run retriever for products table
        # The idea is to create a retriever for the products table so the text search can run over it.

        #Now let's define the model we'll use to generate embeddings
        image_embedding_service = "https://phi-3-vision-128k-instruct-samouelian-edb-ai.apps.ai-dev01.kni.syseng.devcluster.openshift.com/v1/embeddings"

        define_model = (
            "select aidb.create_model("
            "'product_descriptions_embeddings',"
            "'embeddings',"
            f"'{{\"model\":\"phi-3-vision-128k-instruct\", \"url\":\"{image_embedding_service}\"," 
            "\"dimensions\":3072}'::JSONB, "
            "'{\"api_key\":\"\"}'::JSONB); "
        )

        drop_model_sql = "SELECT aidb.delete_model('product_descriptions_embeddings');"
        #print(f"Step 1: define_model={define_model}")

        cur.execute(define_model)

        product_retriever = """SELECT aidb.create_retriever_for_table(
                    name => 'recommend_products',
                    model_name => 'product_descriptions_embeddings',
                    source_table => 'products',
                    source_key_column => 'img_id',
                    source_data_column => 'productdisplayname',
                    source_data_type => 'Text'
                    );"""

        #print(f"Step 2: product_retriever={product_retriever}")

        cur.execute(product_retriever)

In [10]:
try:
    conn = connect_db()
    create_product_description_retriever(conn)
    create_image_retriever(conn)
    conn.commit()
except Exception as e:
    conn.rollback()
    raise(e)
finally:
    close_db(conn)

RaiseException: Failed to create retriever: relation "products" does not exist
CONTEXT:  PL/pgSQL function aidb.create_retriever_for_table(text,text,text,text,aidb.retrieversourcedataformat,text,text,text,text,integer,aidb.distanceoperator,jsonb) line 84 at RAISE
