In [1]:
#!pip install "generative-ai-hub-sdk[all]"

# FAQ Assistant with all processing on SAP BTP
## Step 2: Simulate the end user and ask a question

### Get credentials for SAP HANA Cloud

In [2]:
import json
with open('./credentials.json', 'r') as creds:
  credentials = json.load(creds)

In [3]:
import os
SAP_HANA_CLOUD_ADDRESS  = credentials["SAP_HANA_CLOUD"]["HANA_ADDRESS"]
SAP_HANA_CLOUD_PORT     = credentials["SAP_HANA_CLOUD"]["HANA_PORT"]
SAP_HANA_CLOUD_USER     = credentials["SAP_HANA_CLOUD"]["HANA_USER"]
SAP_HANA_CLOUD_PASSWORD = credentials["SAP_HANA_CLOUD"]["HANA_PASSWORD"]

### Get credentials for SAP AI Core, to access the Generative AI Hub

In [4]:
import os
os.environ["AICORE_CLIENT_ID"]      = credentials["SAP_AI_CORE"]["AICORE_CLIENT_ID"]
os.environ["AICORE_CLIENT_SECRET"]  = credentials["SAP_AI_CORE"]["AICORE_CLIENT_SECRET"]
os.environ["AICORE_AUTH_URL"]       = credentials["SAP_AI_CORE"]["AICORE_AUTH_URL"]
os.environ["AICORE_RESOURCE_GROUP"] = credentials["SAP_AI_CORE"]["AICORE_RESOURCE_GROUP"]
os.environ["AICORE_BASE_URL"]       = credentials["SAP_AI_CORE"]["AICORE_BASE_URL"]   

### Logon to SAP HANA Cloud

In [5]:
import hana_ml.dataframe as dataframe
conn = dataframe.ConnectionContext(
                                   address  = SAP_HANA_CLOUD_ADDRESS,
                                   port     = SAP_HANA_CLOUD_PORT,
                                   user     = SAP_HANA_CLOUD_USER,
                                   password = SAP_HANA_CLOUD_PASSWORD, 
                                   )
conn.connection.isconnected()

True

### Get closest matches for new user question (the candidate questions)

Create the embeddings for a new question, using SAP HANA Cloud's embedding engine

In [6]:
user_question = 'What is the meaning of the letters SAP?'
sql = f'''SELECT VECTOR_EMBEDDING('{user_question}', 'QUERY', 'SAP_NEB.20240715') EMBEDDEDQUESTION FROM DUMMY;'''
user_question_embedding_str = conn.sql(sql).head(1).collect().iloc[0, 0]
user_question_embedding_str[:10]

[-0.008662866428494453,
 0.01020228024572134,
 0.005512293428182602,
 0.007197333034127951,
 0.02618454024195671,
 0.03807266056537628,
 0.04797031357884407,
 0.03472454473376274,
 -0.031678829342126846,
 -0.049704935401678085]

Find the most similar questions, by comparing the new question's vector with the existing question's vectors, using SAP HANA Cloud's vector engine

In [7]:
sql = f'''SELECT TOP 200 "AID", "QID", "QUESTION", COSINE_SIMILARITY("QUESTION_VECTOR", TO_REAL_VECTOR('{user_question_embedding_str}')) AS SIMILARITY
        FROM FAQ_ASSISTANT_QUESTIONS ORDER BY "SIMILARITY" DESC, "AID", "QID" '''
df_remote = conn.sql(sql)
df_remote.head(20).collect()

Unnamed: 0,AID,QID,QUESTION,SIMILARITY
0,1001,1,"What does the acronym ""SAP"" stand for?",0.831729
1,1016,1,What's it like to work at SAP?,0.617224
2,1017,1,What employee benefits does SAP offer?,0.6113
3,1002,1,What is SAP’s vision and mission?,0.610386
4,1019,1,What can I expect in terms of career progressi...,0.604362
5,1018,1,What career development opportunities does SAP...,0.602155
6,1006,1,Why was SAP AG converted into an SE?,0.575461
7,1022,1,Where can I receive SAP training to prepare fo...,0.569313
8,1000,1,When was SAP founded?,0.567949
9,1021,1,How does SAP approach work-life balance?,0.566226


Select at least the best n candidates, or more in case more have high similarity. <font color='red'>Test which values of n seems most suitable for your data</font>

In [8]:
top_n = max(df_remote.filter('SIMILARITY > 0.95').count(), 10)
top_n

10

Download the selected Questions as Pandas DataFrame for processing

In [9]:
df_data = df_remote.head(top_n).select('AID', 'QID', 'QUESTION').collect()
df_data

Unnamed: 0,AID,QID,QUESTION
0,1001,1,"What does the acronym ""SAP"" stand for?"
1,1016,1,What's it like to work at SAP?
2,1017,1,What employee benefits does SAP offer?
3,1002,1,What is SAP’s vision and mission?
4,1019,1,What can I expect in terms of career progressi...
5,1018,1,What career development opportunities does SAP...
6,1006,1,Why was SAP AG converted into an SE?
7,1022,1,Where can I receive SAP training to prepare fo...
8,1000,1,When was SAP founded?
9,1021,1,How does SAP approach work-life balance?


Create a composite ROW-ID out of the AID and QID, which will simplify the identification of the selected Question and its corresponding Answer

In [10]:
df_data['ROWID'] = df_data['AID'].astype(str) + '-' + df_data['QID'].astype(str) + ': '
df_data = df_data[['ROWID', 'QUESTION']]
df_data

Unnamed: 0,ROWID,QUESTION
0,1001-1:,"What does the acronym ""SAP"" stand for?"
1,1016-1:,What's it like to work at SAP?
2,1017-1:,What employee benefits does SAP offer?
3,1002-1:,What is SAP’s vision and mission?
4,1019-1:,What can I expect in terms of career progressi...
5,1018-1:,What career development opportunities does SAP...
6,1006-1:,Why was SAP AG converted into an SE?
7,1022-1:,Where can I receive SAP training to prepare fo...
8,1000-1:,When was SAP founded?
9,1021-1:,How does SAP approach work-life balance?


### Get the one question from the candidates that matches the user's question best

Turn the above Pandas dataframe with the candidate questions into block of text, for use with the Large Language Model

In [11]:
candiates_str = df_data.to_string(header=False,
                                  index=False,
                                  index_names=False)
print(candiates_str)

1001-1:                              What does the acronym "SAP" stand for?
1016-1:                                      What's it like to work at SAP?
1017-1:                              What employee benefits does SAP offer?
1002-1:                                   What is SAP’s vision and mission?
1019-1:            What can I expect in terms of career progression at SAP?
1018-1:               What career development opportunities does SAP offer?
1006-1:                                Why was SAP AG converted into an SE?
1022-1:  Where can I receive SAP training to prepare for a career with SAP?
1000-1:                                               When was SAP founded?
1021-1:                            How does SAP approach work-life balance?


### Get best match through Large Language Model

Prepare the prompt for the Large Language Model to select the best matching question from the candidates

In [12]:
llm_prompt = f'''
Task: which of the following candidate questions is closest to this one?
{user_question}
Only return the ID of the selected question, not the question itself. The ID are the first values in each row. 
In case none of the candidate questions is a good match, return only: NONE

-----------------------------------

Candidate questions. Each question starts with the ID, followed by a :, followed by the question
{candiates_str}
'''
print(llm_prompt)


Task: which of the following candidate questions is closest to this one?
What is the meaning of the letters SAP?
Only return the ID of the selected question, not the question itself. The ID are the first values in each row. 
In case none of the candidate questions is a good match, return only: NONE

-----------------------------------

Candidate questions. Each question starts with the ID, followed by a :, followed by the question
1001-1:                              What does the acronym "SAP" stand for?
1016-1:                                      What's it like to work at SAP?
1017-1:                              What employee benefits does SAP offer?
1002-1:                                   What is SAP’s vision and mission?
1019-1:            What can I expect in terms of career progression at SAP?
1018-1:               What career development opportunities does SAP offer?
1006-1:                                Why was SAP AG converted into an SE?
1022-1:  Where can I receive SAP

Specify which Large Language Model to use (must be deployed on the Generative AI Hub)

In [13]:
AI_CORE_MODEL_NAME  = 'mistralai--mistral-large-instruct'

Send the prompt to the Large Language Model via the SAP Generative AI Hub and display the full response

In [14]:
from gen_ai_hub.proxy.native.openai import chat
messages = [{"role": "user", "content": llm_prompt}
           ]
kwargs = dict(model_name=AI_CORE_MODEL_NAME, messages=messages, temperature=0)
response = chat.completions.create(**kwargs)
print(response)

ChatCompletion(id='chat-7b9941e0a1b9411c852e043f6a1761f2', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=' 1001-1', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=[]), stop_reason=None)], created=1740987695, model='mistralai--mistral-large-instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=8, prompt_tokens=310, total_tokens=318, completion_tokens_details=None, prompt_tokens_details=None))


Extract the ROWID that was selected by the Large Language Model

In [15]:
llm_response = response.choices[0].message.content.lstrip()
llm_response

'1001-1'

### Get the pre-defined Answer that belongs to the selected Question from the FAQ

In [16]:
aid = qid = None
matching_question = matching_answer = None

if llm_response != 'NONE':
   aid, qid = llm_response.split('-')

   # From HANA Cloud get the question from the FAQ that matches the user request best
   df_remote = conn.table('FAQ_ASSISTANT_QUESTIONS').filter(f''' "AID" = '{aid}' AND "QID" = '{qid}' ''').select('QUESTION')
   matching_question = df_remote.head(5).collect().iloc[0,0]
    
   # From HANA Cloud get the predefined answer of the above question from the FAQ
   df_remote = conn.table('FAQ_ASSISTANT_ANSWERS').filter(f''' "AID" = '{aid}' ''').select('ANSWER')
   matching_answer = df_remote.head(5).collect().iloc[0,0]
matching_answer

'"SAP" stands for Systems, Applications, and Products in Data Processing.'

### Tailor the answer for a natural conversation flow

Attempt to adjust the pre-defined response to a more natural answer, based on the user's specific phrase

In [17]:
llm_prompt = f'''
Task: Answer the following question. Only consider the context that is provided further below.
{user_question}

-----------------------------------

Context to consider to answer the above question:
{matching_answer}
'''
print(llm_prompt)


Task: Answer the following question. Only consider the context that is provided further below.
What is the meaning of the letters SAP?

-----------------------------------

Context to consider to answer the above question:
"SAP" stands for Systems, Applications, and Products in Data Processing.



In [18]:
messages = [{"role": "user", "content": llm_prompt}           ]
kwargs = dict(model_name=AI_CORE_MODEL_NAME, messages=messages, temperature=0)
response = chat.completions.create(**kwargs)
print(response)

ChatCompletion(id='chat-10be1d9cb0bd455a8fafad9fce495aa6', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=' The letters SAP stand for Systems, Applications, and Products in Data Processing.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=[]), stop_reason=None)], created=1740987696, model='mistralai--mistral-large-instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=19, prompt_tokens=69, total_tokens=88, completion_tokens_details=None, prompt_tokens_details=None))


In [19]:
natural_answer = response.choices[0].message.content.lstrip()
natural_answer

'The letters SAP stand for Systems, Applications, and Products in Data Processing.'

### Final answer

In [20]:
print(f"""
The user question was:
{user_question}

The selected question from the FAQ is:
{matching_question}

The pre-defined answer to the selected question is:
{matching_answer}

The natural answer is:
{natural_answer}""")


The user question was:
What is the meaning of the letters SAP?

The selected question from the FAQ is:
What does the acronym "SAP" stand for?

The pre-defined answer to the selected question is:
"SAP" stands for Systems, Applications, and Products in Data Processing.

The natural answer is:
The letters SAP stand for Systems, Applications, and Products in Data Processing.
