# Add additional Questions and Answers

### Get credentials for SAP HANA Cloud

In [1]:
import json
with open('./credentials.json', 'r') as creds:
  credentials = json.load(creds)

In [2]:
import os
SAP_HANA_CLOUD_ADDRESS  = credentials["SAP_HANA_CLOUD"]["HANA_ADDRESS"]
SAP_HANA_CLOUD_PORT     = credentials["SAP_HANA_CLOUD"]["HANA_PORT"]
SAP_HANA_CLOUD_USER     = credentials["SAP_HANA_CLOUD"]["HANA_USER"]
SAP_HANA_CLOUD_PASSWORD = credentials["SAP_HANA_CLOUD"]["HANA_PASSWORD"]

### Get credentials for SAP AI Core

In [3]:
import os
os.environ["AICORE_CLIENT_ID"]      = credentials["SAP_AI_CORE"]["AICORE_CLIENT_ID"]
os.environ["AICORE_CLIENT_SECRET"]  = credentials["SAP_AI_CORE"]["AICORE_CLIENT_SECRET"]
os.environ["AICORE_AUTH_URL"]       = credentials["SAP_AI_CORE"]["AICORE_AUTH_URL"]
os.environ["AICORE_RESOURCE_GROUP"] = credentials["SAP_AI_CORE"]["AICORE_RESOURCE_GROUP"]
os.environ["AICORE_BASE_URL"]       = credentials["SAP_AI_CORE"]["AICORE_BASE_URL"]   

### Logon to SAP HANA Cloud that has vector engine enabled

In [4]:
import hana_ml.dataframe as dataframe
conn = dataframe.ConnectionContext(
                                   address  = SAP_HANA_CLOUD_ADDRESS,
                                   port     = SAP_HANA_CLOUD_PORT,
                                   user     = SAP_HANA_CLOUD_USER,
                                   password = SAP_HANA_CLOUD_PASSWORD, 
                                  )
conn.connection.isconnected()

True

### Upload the questions, with empty columns for the embeddings

Load data from Excel to pandas

In [5]:
#!pip install openpyxl
import pandas as pd
df_q_local = pd.read_excel ('FAQ_QUESTIONS.xlsx') 

In [6]:
df_q_local.head(10)

Unnamed: 0,AID,QID,QUESTION,QUESTION_VECTOR
0,1000,1,When was SAP founded?,
1,1001,1,"What does the acronym ""SAP"" stand for?",
2,1002,1,What is SAP’s vision and mission?,
3,1003,1,What is the business outlook for the current f...,
4,1004,1,Are you planning to grow organically or throug...,
5,1005,1,Where can I find information about the SAP Exe...,
6,1006,1,Why was SAP AG converted into an SE?,
7,1007,1,What is SAP's current headcount?,
8,1008,1,Do SAP employees participate in the company's ...,
9,1009,1,\nWhat is SAP’s purpose and overarching strate...,


Download existing questions from SAP HANA Cloud

In [7]:
df_q_fromhana = conn.table('FAQ_QUESTIONS').drop('QUESTION_VECTOR').collect()
df_q_fromhana.head(10)

Unnamed: 0,AID,QID,QUESTION
0,1000,1,When was SAP founded?
1,1001,1,"What does the acronym ""SAP"" stand for?"
2,1002,1,What is SAP’s vision and mission?
3,1003,1,What is the business outlook for the current f...
4,1004,1,Are you planning to grow organically or throug...
5,1005,1,Where can I find information about the SAP Exe...
6,1006,1,Why was SAP AG converted into an SE?
7,1007,1,What is SAP's current headcount?
8,1008,1,Do SAP employees participate in the company's ...
9,1009,1,\nWhat is SAP’s purpose and overarching strate...


Compare local data with data from SAP HANA Cloud to identify which questions are new

In [8]:
df_all = df_q_local.merge(df_q_fromhana, on=['AID', 'QUESTION', 'QID'], 
                   how='left', indicator=True)
df_new = df_all[df_all['_merge'] == 'left_only']
df_new = df_new.drop('_merge', axis=1)
df_new

Unnamed: 0,AID,QID,QUESTION,QUESTION_VECTOR
24,1001,2,"A Applications and P Products, but what about ...",


Append new questions to existing SAP HANA Cloud table

In [9]:
import hana_ml.dataframe as dataframe
df_remote = dataframe.create_dataframe_from_pandas(connection_context=conn, 
                                                   pandas_df=df_new, 
                                                   table_name='FAQ_QUESTIONS',
                                                   force=False,
                                                   replace=False, 
                                                   append=True
                                                   )

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.40it/s]


In [10]:
conn.table('FAQ_QUESTIONS').count()

25

### Upload the answers

In [11]:
import pandas as pd
df_data= pd.read_excel ('FAQ_ANSWERS.xlsx') 

In [12]:
df_data.head(5)

Unnamed: 0,AID,ANSWER
0,1000,"In 1972, five former IBM employees founded the..."
1,1001,"""SAP"" stands for Systems, Applications, and Pr..."
2,1002,"In the digital era, data is the “new currency”..."
3,1003,The outlook and mid-term ambition can be found...
4,1004,Organic growth remains the primary driver of o...


Because these texts don't need to be vectorised (which might require some time), the table gets deleted and all rows get updated.

In [13]:
import hana_ml.dataframe as dataframe
df_remote = dataframe.create_dataframe_from_pandas(connection_context=conn, 
                                                   pandas_df=df_data, 
                                                   table_name='FAQ_ANSWERS',
                                                   force=True,
                                                   replace=False)

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.84it/s]


In [14]:
conn.table('FAQ_ANSWERS').count()

24

### Calculate text vectors

<font color='red'>Now run the Notebook "020 Create embeddings of Questions" that calculates and stores missing vectors for new questions</font>