In [None]:
from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
from gen_ai_hub.proxy.langchain.openai import OpenAIEmbeddings

from langchain.chains import RetrievalQA
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader

from langchain_community.vectorstores.hanavector import HanaDB
from hdbcli import dbapi

import configparser

In [None]:

EMBEDDING_DEPLOYMENT_ID = "d7b8e46fc3d5c25f"
LLM_DEPLOYMENT_ID = "d01dff41125cfa27"
EMBEDDING_TABLE = "EMBEDDINGS_SHAWKING"

In [None]:
config = configparser.ConfigParser()
config.read('.user.ini')
connection = dbapi.connect(
    address=config.get('hana', 'url'), 
    port=config.get('hana', 'port'), 
    user=config.get('hana', 'user'),
    password=config.get('hana', 'passwd'),
    autocommit=True,
    sslValidateCertificate=False
)

In [None]:

# Load custom documents
loader = TextLoader('./time_travel.txt')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
print(f"Number of document chunks: {len(texts)}")


In [None]:
# Create embeddings for custom documents
embeddings = OpenAIEmbeddings(deployment_id=EMBEDDING_DEPLOYMENT_ID)
db = HanaDB(
    embedding=embeddings, connection=connection, table_name=EMBEDDING_TABLE
)

# Delete already existing documents from the table
db.delete(filter={})

# add the loaded document chunks
db.add_documents(texts)

In [None]:
# Define which model to use
chat_llm = ChatOpenAI(deployment_id=LLM_DEPLOYMENT_ID)

# Create a retriever instance of the vector store
retriever = db.as_retriever(search_kwargs={"k": 10})

In [None]:
# Create the QA instance to query llm based on custom documents
qa = RetrievalQA.from_llm(llm=chat_llm, retriever=retriever)

# Send query
query = "Was ist das machine learning model hinter dem regression model template von dem Data Attribute Recommendation service?"