# Create embeddings of new Questions and store the vector in SAP HANA Cloud

### Get credentials for SAP HANA Cloud

In [1]:
import json
with open('./credentials.json', 'r') as creds:
  credentials = json.load(creds)

In [2]:
SAP_HANA_CLOUD_ADDRESS  = credentials["SAP_HANA_CLOUD"]["HANA_ADDRESS"]
SAP_HANA_CLOUD_PORT     = credentials["SAP_HANA_CLOUD"]["HANA_PORT"]
SAP_HANA_CLOUD_USER     = credentials["SAP_HANA_CLOUD"]["HANA_USER"]
SAP_HANA_CLOUD_PASSWORD = credentials["SAP_HANA_CLOUD"]["HANA_PASSWORD"]

### Get credentials for SAP AI Core

In [3]:
import os
os.environ["AICORE_CLIENT_ID"]      = credentials["SAP_AI_CORE"]["AICORE_CLIENT_ID"]
os.environ["AICORE_CLIENT_SECRET"]  = credentials["SAP_AI_CORE"]["AICORE_CLIENT_SECRET"]
os.environ["AICORE_AUTH_URL"]       = credentials["SAP_AI_CORE"]["AICORE_AUTH_URL"]
os.environ["AICORE_RESOURCE_GROUP"] = credentials["SAP_AI_CORE"]["AICORE_RESOURCE_GROUP"]
os.environ["AICORE_BASE_URL"]       = credentials["SAP_AI_CORE"]["AICORE_BASE_URL"]   

### Logon to SAP HANA Cloud

In [4]:
import hana_ml.dataframe as dataframe
conn = dataframe.ConnectionContext(
                                   address  = SAP_HANA_CLOUD_ADDRESS,
                                   port     = SAP_HANA_CLOUD_PORT,
                                   user     = SAP_HANA_CLOUD_USER,
                                   password = SAP_HANA_CLOUD_PASSWORD, 
                                  )
conn.connection.isconnected()

True

### Go through all Questions, that have no embeddings saved

In [5]:
df_remote_toprocess = conn.sql('''SELECT "AID", "QID", "QUESTION" FROM FAQ_QUESTIONS WHERE QUESTION_VECTOR IS NULL ORDER BY "AID", "QID" ''')

Display the first questions that need to be encoded

In [6]:
df_remote_toprocess.head(5).collect()

Unnamed: 0,AID,QID,QUESTION
0,1000,1,When was SAP founded?
1,1001,1,"What does the acronym ""SAP"" stand for?"
2,1002,1,What is SAP’s vision and mission?
3,1003,1,What is the business outlook for the current f...
4,1004,1,Are you planning to grow organically or throug...


How many questions need to be encoded

In [7]:
df_remote_toprocess.count()

24

### Specify embeddings engine 

In [8]:
from gen_ai_hub.proxy.langchain.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings(proxy_model_name='text-embedding-ada-002')

### Create embeddings for the above questions and store them in the table

In [9]:
dbapi_cursor = conn.connection.cursor()
rowids_toprocess = df_remote_toprocess.select("AID", "QID", "QUESTION").collect()
for index, row_toprocess in rowids_toprocess.iterrows(): 
    my_embedding = embedding.embed_documents(row_toprocess['QUESTION']) 
    my_embedding_str = str(my_embedding[0])
    my_aid = row_toprocess['AID']
    my_qid = row_toprocess['QID']
    print(str(my_aid) + '-' + str(my_qid) + ': ' + str(my_embedding_str[:100]))
    dbapi_cursor.execute(f"""UPDATE "FAQ_QUESTIONS" SET "QUESTION_VECTOR" = TO_REAL_VECTOR('{my_embedding_str}') 
                             WHERE "AID" = {my_aid} AND "QID" = {my_qid};""")  

1000-1: [0.017259458564609514, -0.017521175116587108, 0.022659065838184236, -0.004883063162897415, -0.018444
1001-1: [0.017259458564609514, -0.017521175116587108, 0.022659065838184236, -0.004883063162897415, -0.018444
1002-1: [0.017259458564609514, -0.017521175116587108, 0.022659065838184236, -0.004883063162897415, -0.018444
1003-1: [0.017259458564609514, -0.017521175116587108, 0.022659065838184236, -0.004883063162897415, -0.018444
1004-1: [0.015408079651989546, -0.01377206071993299, 0.024191622895430708, 0.0016184182164437407, -0.0322644
1005-1: [0.017259458564609514, -0.017521175116587108, 0.022659065838184236, -0.004883063162897415, -0.018444
1006-1: [0.017259458564609514, -0.017521175116587108, 0.022659065838184236, -0.004883063162897415, -0.018444
1007-1: [0.017259458564609514, -0.017521175116587108, 0.022659065838184236, -0.004883063162897415, -0.018444
1008-1: [0.006315371411562854, -0.006675501713100856, 0.0037126135158026888, -0.008178225948679044, -0.03250
1009-1: [0.00250789

In [10]:
df_remote = conn.table('FAQ_QUESTIONS').sort(['AID', 'QID'])
df_remote.head(5).collect()

Unnamed: 0,AID,QID,QUESTION,QUESTION_VECTOR
0,1000,1,When was SAP founded?,"[0, 6, 0, 0, 181, 99, 141, 60, 145, 136, 143, ..."
1,1001,1,"What does the acronym ""SAP"" stand for?","[0, 6, 0, 0, 181, 99, 141, 60, 145, 136, 143, ..."
2,1002,1,What is SAP’s vision and mission?,"[0, 6, 0, 0, 181, 99, 141, 60, 145, 136, 143, ..."
3,1003,1,What is the business outlook for the current f...,"[0, 6, 0, 0, 181, 99, 141, 60, 145, 136, 143, ..."
4,1004,1,Are you planning to grow organically or throug...,"[0, 6, 0, 0, 44, 114, 124, 60, 54, 164, 97, 18..."
