In [None]:
%pip install generative-ai-hub-sdk panda hana-ml

<b>Loading vector data from a csv file</b><br/>
Download the following csv file and save it in your system. Download File<br/>
Disclaimer : To execute the tutorial if you are uploading sample CSV FILE, Use
steps 1-5 to upload the file Don’t use Hana Data base Explorer to directly
upload the file as it may change the format in HANA.<br/><br/>
Execute the following python code in the same folder. This will load the data and store it in a data-frame.

In [1]:
import pandas as pd
df = pd.read_csv('GRAPH_DOCU_QRC.csv', low_memory=False)
df.head(3)

Unnamed: 0,ID,L1,L2,L3,FILENAME,HEADER1,HEADER2,TEXT,VECTOR_STR
0,273,90,40,0,090-040-000-Appendix_C_-_GraphScript_Cheat_She...,Appendix C - GraphScript Cheat Sheet,Weighted Path Functions,<!--! subsection -->\n### WEIGHT \n```graphsc...,"[0.015699435,0.020284351,0.0003677337,-0.00413..."
1,52,60,20,30,060-020-030-Basic_Vertex_Operations.md,Basic Vertex Operations,DEGREE,Returns the number of incoming and outgoing ed...,"[0.018821003,0.012627394,-0.007940338,-0.00959..."
2,44,60,20,20,060-020-020-Basic_Graph_Operations.md,Basic Graph Operations,EDGES,Returns all edges in a graph. \n- EDGES(GRAPH...,"[-0.013607875,0.009249507,-0.03403819,-0.03394..."


<b>Connection to the HANA Vector store</b><br/>
Execute the following python code to create a connection to the HANA Vector storage.

In [2]:
from hana_ml import ConnectionContext
# cc = ConnectionContext(userkey='VDB_BETA', encrypt=True)
cc= ConnectionContext(
address='<address>',
port=443,
user='<username>',
password='<password>',
encrypt=True
)

print(cc.hana_version())
print(cc.get_current_schema())

4.00.000.00.1708429435 (fa/CE2024.2)
DEVELOPER


<b>Creating a table</b><br/>
To create a table, execute the following python command.<br/>

Wrong number of columns in csv against the table column, name the last unnamed columned as "REAL_VEC_STR", hence one extra column named "REAL_VEC_STR" added to the table

In [3]:
# Create a table
cursor = cc.connection.cursor()
#sql_command = '''DROP TABLE GRAPH_DOCU_QRC3_2201'''

# Wrong number of columns in csv against the table column, name the last unnamed columned as "REAL_VEC_STR",
# hence add one extra column named"REAL_VEC_STR"
sql_command = '''CREATE TABLE GRAPH_DOCU_QR(ID BIGINT, L1 NVARCHAR(3), L2 NVARCHAR(3), L3 NVARCHAR(3), FILENAME NVARCHAR(100), HEADER1 NVARCHAR(5000), HEADER2 NVARCHAR(5000), TEXT NCLOB, VECTOR_STR NCLOB);'''
cursor.execute(sql_command)
cursor.close()

<b>Uploading the data to the database</b><br/>
TExecute the following code to upload the data to the database.

In [4]:
from hana_ml.dataframe import create_dataframe_from_pandas
v_hdf = create_dataframe_from_pandas(
    connection_context=cc,
    pandas_df=df,
    table_name="GRAPH_DOCU_QR",
    allow_bigint=True,
    append=True
    )

100%|██████████| 1/1 [00:04<00:00,  4.30s/it]


<b>Creating a VECTOR column</b><br/>
Add a new column VECTOR to the table to store the vectors. Execute the following python code.

In [5]:
# Add REAL_VECTOR column
cursor = cc.connection.cursor()
sql_command = '''ALTER TABLE GRAPH_DOCU_QR ADD (VECTOR REAL_VECTOR(1536));'''
cursor.execute(sql_command)
cursor.close()

<b>Creating vectors from strings</b><br/>
The vectors for the strings can be created using the TO_REAL_VECTOR() function. <br/>
Execute the following code to update the VECTOR column with the vectors.

In [6]:
# Create vectors from strings
cursor = cc.connection.cursor()
sql_command = '''UPDATE GRAPH_DOCU_QR SET VECTOR = TO_REAL_VECTOR(VECTOR_STR);'''
cursor.execute(sql_command)
cursor.close()

<b>Get Embeddings</b><br/>
Define the function get_embedding() to generate embeddings for our input texts. Execute the following python code.

In [None]:
!pip install python-dotenv

In [1]:
import os
# Create a dictionary with the environment variables you want to set.
env_vars = {
    'AICORE_AUTH_URL' : '<aicore-authurl>/oauth/token',
    'AICORE_CLIENT_ID' : '<aicore-Client-ID>',
    'AICORE_CLIENT_SECRET' : '<aicore-client-secret>',
    'AICORE_BASE_URL' : '<aicore-baseURL>/v2',
    'AICORE_RESOURCE_GROUP' : '<resource-group>'
}

# Set the environment variables using `os.environ`.
for key, value in env_vars.items():
    os.environ[key] = value

In [8]:
print(os.environ['AICORE_AUTH_URL'])  # Output: Hello World

https://reply-s5ghczh7.authentication.eu10.hana.ondemand.com/oauth/token


In [9]:
# Get embeddings
from gen_ai_hub.proxy.native.openai import embeddings

def get_embedding(input, model="text-embedding-ada-002") -> str:
    response = embeddings.create(
        model_name ="text-embedding-ada-002",
        input=input
    )
    return response.data[0].embedding

In [10]:
get_embedding("hello", model="text-embedding-ada-002")

[-0.02504645101726055,
 -0.01940273866057396,
 -0.027782395482063293,
 -0.03103380836546421,
 -0.024649936705827713,
 0.027438750490546227,
 -0.012470357120037079,
 -0.00849861092865467,
 -0.01743338815867901,
 -0.008465568535029888,
 0.03254055976867676,
 0.004275739658623934,
 -0.024583851918578148,
 -0.0006298786029219627,
 0.01412910595536232,
 -0.0015034478856250644,
 0.03938703238964081,
 0.002009002957493067,
 0.026843979954719543,
 -0.012569485232234001,
 -0.02101522870361805,
 0.008881907910108566,
 0.008445742540061474,
 -0.0030630684923380613,
 -0.005362848285585642,
 -0.00950311217457056,
 0.01106934156268835,
 -0.0016967483097687364,
 0.003452973673120141,
 -0.023235704749822617,
 0.006730820517987013,
 -0.007903840392827988,
 -0.02392299473285675,
 -0.008901732973754406,
 0.00683986209332943,
 -0.01367972418665886,
 0.00950311217457056,
 -0.014115888625383377,
 0.02176860347390175,
 -0.010573700070381165,
 0.0034133223816752434,
 -0.014591705054044724,
 0.0052438941784203

<b>Running vector search</b><br/>
Define a function run_vector_search(). This function will search the vector database and finds the rows which are most similar to a given query.<br/>
Execute the following python code

In [11]:
# Wrapping HANA vector search in a function
def run_vector_search(query: str, metric="COSINE_SIMILARITY", k=4):
    if metric == 'L2DISTANCE':
        sort = 'ASC'
    else:
        sort = 'DESC'
    query_vector = get_embedding(query)
    sql = '''SELECT TOP {k} "ID", "HEADER1", "HEADER2", "TEXT" FROM "GRAPH_DOCU_QR" ORDER BY "{metric}"("VECTOR", TO_REAL_VECTOR('{qv}')) {sort}'''.format(k=k, metric=metric, qv=query_vector, sort=sort)
    hdf = cc.sql(sql)
    df_context = hdf.head(k).collect()
    # context = ' '.join(df_context['TEXT'].astype('string'))
    return df_context


In [12]:
#Now we can test the function by sending a query. The function prints the rows that are most similar to the queries.
# Test the vector search
query = "How can I run a shortest path algorithm?"
df_context = run_vector_search(query=query, metric="COSINE_SIMILARITY",k=4)
df_context

Unnamed: 0,ID,HEADER1,HEADER2,TEXT
0,211,Complex GraphScript Examples,GraphScript Procedure Example,The following example depicts a more complex e...
1,90,Graph Traversal Statements,Dijkstra's Algorithm (DIJKSTRA),DIJKSTRA searches for shortest paths in a weig...
2,83,Built-In Graph Algorithms,Shortest Path,```bnf\n<sssp_function> ::= SHORTEST_PATH '(' ...
3,65,Basic Weighted Path Operations,(Constructors),WEIGHTEDPATH objects can’t be constructed dire...


In [13]:
from gen_ai_hub.proxy.native.openai import chat

# basic LLM prompt for RAG
sys_content = '''Your task is to answer the question using the provided context wrapped in triple quotes.
If the provided context does not contain the information needed to answer this question then come up with your own answer. '''

def ask_llm(query: str, retrieval_augmented_generation: bool, metric='COSINE_SIMILARITY', k = 4) -> str:
    context = ''
    if retrieval_augmented_generation == True:
        df_context = run_vector_search(query, metric, k)
        context = ' '.join(df_context['TEXT'].astype('string'))
    user_content = '"""' + context + '"""'+ ' Question: ' + query
    messages=[{"role": "system", "content": sys_content}, {"role": "user", "content": user_content}]

    # kwargs = dict(model_name='gpt-4', messages=messages)
    kwargs = dict(model_name='gpt-35-turbo', messages=messages)

    response = chat.completions.create(**kwargs)

    return response


In [14]:
query = "I want to calculate a shortest path. How do I do that?"

response = ask_llm(query=query, retrieval_augmented_generation=True, k=4)

print(response.choices[0].message.content)

To calculate a shortest path, you can use the SHORTEST_PATH function in GraphScript. Here is an example of how to use the SHORTEST_PATH function:

```graphscript
GRAPH g = GRAPH("FLIGHTS", "GRAPH");
VERTEX v_source(:g, 'FRA');
VERTEX v_target(:g, 'JFK');
WEIGHTEDPATH<BIGINT> p = SHORTEST_PATH(:g, :v_source, :v_target);
```

In this example, we first declare a graph variable, `g`, which represents the graph on which we want to calculate the shortest path. Then, we declare two vertex variables, `v_source` and `v_target`, which represent the source and target vertices of the shortest path. We pass these variables to the SHORTEST_PATH function along with the graph variable. The function returns a WEIGHTEDPATH object, `p`, which represents the shortest path between the source and target vertices.

You can access various properties of the WEIGHTEDPATH object, such as the vertices and edges in the path, the length of the path, and the weight of the path. For example:

```graphscript
SEQUENCE<