In [67]:
from azure.kusto.data import KustoClient, KustoConnectionStringBuilder
from azure.kusto.data.exceptions import KustoServiceError
from azure.kusto.data.helpers import dataframe_from_result_table

from langchain.chains import RetrievalQA
from langchain.llms import AzureOpenAI
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from openai.embeddings_utils import cosine_similarity
from openai.embeddings_utils import get_embedding
from langchain.text_splitter import RecursiveCharacterTextSplitter
import openai

from IPython.display import display, HTML, JSON, Markdown

from dotenv import load_dotenv
import time
import tiktoken
import pandas as pd
from ast import literal_eval
import os
from tenacity import retry, wait_random_exponential, stop_after_attempt

# Configure environment variables
load_dotenv()

AAD_TENANT_ID = os.getenv("AAD_TENANT_ID")
KUSTO_CLUSTER = os.getenv("KUSTO_CLUSTER")
KUSTO_DATABASE = os.getenv("KUSTO_DATABASE")
KUSTO_TABLE = os.getenv("KUSTO_TABLE")
KUSTO_MANAGED_IDENTITY_APP_ID = os.getenv("KUSTO_MANAGED_IDENTITY_APP_ID")
KUSTO_MANAGED_IDENTITY_SECRET = os.getenv("KUSTO_MANAGED_IDENTITY_SECRET")

# Configure OpenAI API
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME")
OPENAI_DEPLOYMENT_VERSION = os.getenv("OPENAI_DEPLOYMENT_VERSION")

OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME")
OPENAI_ADA_EMBEDDING_MODEL_NAME = os.getenv("OPENAI_ADA_EMBEDDING_MODEL_NAME")

In [15]:
# Configure OpenAI API
openai.api_type = "azure"
openai.api_version = OPENAI_DEPLOYMENT_VERSION
openai.api_base = OPENAI_DEPLOYMENT_ENDPOINT
openai.api_key = OPENAI_API_KEY

# init openAI for GPT3.5
llm = AzureOpenAI(model=OPENAI_MODEL_NAME,
             deployment_name=OPENAI_DEPLOYMENT_NAME,
             openai_api_version=OPENAI_DEPLOYMENT_VERSION,
             temperature=0,
             max_tokens=400,
             top_p=1
)

# init openAI for embeddings ada
embeddings = OpenAIEmbeddings(
    deployment=OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME,
    model=OPENAI_ADA_EMBEDDING_MODEL_NAME,
    openai_api_base=OPENAI_DEPLOYMENT_ENDPOINT,
    openai_api_type="azure",
)


#### IMPORTANT!! Embeddings Creation Section - Run this only once !!!
You only need to run this once to create the embeddings and save them to Azure Data Explorer.   
Then you can use the already created database and table in Azure Data explorer for retrieval

In [16]:
# you can add as many urls as you want, but for this example we will only use one
# "moby dick" the book is available online at the URL below
urls = ["https://www.gutenberg.org/files/2701/2701-0.txt"]

loader = UnstructuredURLLoader(urls=urls)
documents = loader.load()

#we use chunk size of 1000 and 10% overlap to try not to cut sentences in the middle
#this regex separates by placing the sentence period when cutting a chunk at the end of that chunk
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100, separators=["\n\n", "\n", "(?<=\. )", " ", ""])
chunks = text_splitter.split_documents(documents)
len(chunks)

1819

In [33]:
#we use the tenacity library to create delays and retries when calling openAI to avoid hitting throttling limits
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def calc_embeddings(text):
    deployment = OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME
    # replace newlines, which can negatively affect performance.
    text = text.replace("\n", " ")
    return get_embedding(text, engine=deployment)

In [18]:
#save all the chunks into a pandas dataframe
df = pd.DataFrame(columns=['document_name', 'content', 'embedding'])
for ch in chunks:
    dict = {'document_name': ch.metadata['source'],'content': ch.page_content, 'embedding': ""}
    temp_df = pd.DataFrame(dict, index=[0])
    df = pd.concat([df, temp_df], ignore_index=True)
df.head()

Unnamed: 0,document_name,content,embedding
0,https://www.gutenberg.org/files/2701/2701-0.txt,The Project Gutenberg eBook of Moby-Dick; or T...,
1,https://www.gutenberg.org/files/2701/2701-0.txt,CONTENTS\n\nETYMOLOGY.\n\nEXTRACTS (Supplied b...,
2,https://www.gutenberg.org/files/2701/2701-0.txt,CHAPTER 33. The Specksnyder.\n\nCHAPTER 34. Th...,
3,https://www.gutenberg.org/files/2701/2701-0.txt,CHAPTER 58. Brit.\n\nCHAPTER 59. Squid.\n\nCHA...,
4,https://www.gutenberg.org/files/2701/2701-0.txt,CHAPTER 85. The Fountain.\n\nCHAPTER 86. The T...,


In [20]:
# calculate the embeddings using openAI
df["embedding"] = df.content.apply(lambda x: calc_embeddings(x))
df.to_csv('data/adx/adx_embeddings.csv', index=False)
print(df.head(10))


                                      document_name  \
0   https://www.gutenberg.org/files/2701/2701-0.txt   
1   https://www.gutenberg.org/files/2701/2701-0.txt   
2   https://www.gutenberg.org/files/2701/2701-0.txt   
3   https://www.gutenberg.org/files/2701/2701-0.txt   
4   https://www.gutenberg.org/files/2701/2701-0.txt   
..                                              ...   
95  https://www.gutenberg.org/files/2701/2701-0.txt   
96  https://www.gutenberg.org/files/2701/2701-0.txt   
97  https://www.gutenberg.org/files/2701/2701-0.txt   
98  https://www.gutenberg.org/files/2701/2701-0.txt   
99  https://www.gutenberg.org/files/2701/2701-0.txt   

                                              content  \
0   The Project Gutenberg eBook of Moby-Dick; or T...   
1   CONTENTS\n\nETYMOLOGY.\n\nEXTRACTS (Supplied b...   
2   CHAPTER 33. The Specksnyder.\n\nCHAPTER 34. Th...   
3   CHAPTER 58. Brit.\n\nCHAPTER 59. Squid.\n\nCHA...   
4   CHAPTER 85. The Fountain.\n\nCHAPTER 86. The T... 

In [21]:
#save to local file
df.to_csv('data/adx/adx_embeddings.csv', index=False)

### Ingest the embeddings into Azure Data Explorer


* We will ingest the file to ADX manually into a table called "books"

In [26]:
# Connect to adx using AAD app registration
cluster = KUSTO_CLUSTER
kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(cluster, KUSTO_MANAGED_IDENTITY_APP_ID, KUSTO_MANAGED_IDENTITY_SECRET,  AAD_TENANT_ID)
client = KustoClient(kcsb)
kusto_db = KUSTO_DATABASE
table_name = "books"

In [28]:
#testing the connection to kusto works - sample query to get the top 10 results from wikipedia
query = table_name + " | take 10"

response = client.execute(kusto_db, query)
for row in response.primary_results[0]:
    print("Title :{}".format(row["content"]))

Title :The Project Gutenberg eBook of Moby-Dick; or The Whale, by Herman Melville

This eBook is for the use of anyone anywhere in the United States and most other parts of the world at no cost and with almost no restrictions whatsoever. You may copy it, give it away or re-use it under the terms of the Project Gutenberg License included with this eBook or online at www.gutenberg.org. If you are not located in the United States, you will have to check the laws of the country where you are located before using this eBook.

Title: Moby



Dick; or The Whale

Author: Herman Melville

Release Date: June, 2001 [eBook #2701] [Most recently updated: August 18, 2021]

Language: English

Character set encoding: UTF



8

Produced by: Daniel Lazarus, Jonesey, and David Widger

*** START OF THE PROJECT GUTENBERG EBOOK MOBY-DICK; OR THE WHALE ***

MOBY



DICK;

or, THE WHALE.

By Herman Melville

CONTENTS

ETYMOLOGY.

EXTRACTS (Supplied by a Sub-Sub-Librarian).

CHAPTER 1. Loomings.
Title :CONTENT

In [54]:
def get_answer_from_adx(question, nr_of_answers=1):
        searchedEmbedding = calc_embeddings(question)
        kusto_query = table_name + " | extend similarity = series_cosine_similarity_fl(dynamic("+str(searchedEmbedding)+"), embedding,1,1) | top " + str(nr_of_answers) + " by similarity desc "
        response = client.execute(kusto_db, kusto_query)

        for row in response.primary_results[0]:
                return row['content']

In [44]:
# this is the question we want to ask and its embeddings
question = calc_embeddings("Why does the coffin prepared for Queequeg become Ishmael's life buoy once the Pequod sinks?")
print('Embeddings: {}'.format(question))

Embeddings: [0.013887224718928337, -0.027855660766363144, 0.011254607699811459, -0.011153092607855797, -0.017000345513224602, 0.002456658286973834, -0.02023528330028057, 0.012134402990341187, 0.0021724167745560408, 0.0027578186709433794, 0.017961353063583374, 0.033486347645521164, 0.02099326066672802, 0.0012384805595502257, -0.0024414309300482273, 0.004300843924283981, 0.02877606265246868, 0.0084121935069561, 0.0006949534290470183, -0.01524075586348772, -0.005539324134588242, 0.015389643609523773, 0.013602983206510544, -0.007153409533202648, -0.018286200240254402, 0.015592673793435097, 0.008452799171209335, -0.01563327945768833, 0.00900774635374546, 0.011999049223959446, 0.00525169912725687, -0.007945225574076176, -0.029263334348797798, -0.00922431144863367, -0.03134777024388313, -0.00798583123832941, 0.004629075061529875, -0.01672963984310627, 0.013129247352480888, -0.0060333628207445145, -0.008303910493850708, -0.0027036776300519705, -0.018245594576001167, 0.003080974332988262, -0.03

In [55]:
# here we get our answer but in a long and non concise way
get_answer_from_adx("Why does the coffin prepared for Queequeg become Ishmael's life buoy once the Pequod sinks?",1)

'Leaning over in his hammock, Queequeg long regarded the coffin with an attentive eye. He then called for his harpoon, had the wooden stock drawn from it, and then had the iron part placed in the coffin along with one of the paddles of his boat. All by his own request, also, biscuits were then ranged round the sides within: a flask of fresh water was placed at the head, and a small bag of woody earth scraped up in the hold at the foot; and a piece of sail-cloth being rolled up for a pillow, Queequeg now entreated to be lifted into his final bed, that he might make trial of its comforts, if any it had. He lay without moving a few minutes, then told one to go to his bag and bring out his little god, Yojo. Then crossing his arms on his breast with Yojo between, he called for the coffin lid (hatch he called it) to be placed over him. The head part turned over with a leather hinge, and there lay Queequeg in his coffin with little but his composed countenance in view. “Rarmai” (it will do;'

In [78]:
# by using a prompt we can ask the LLM model and get answers in a concise manner
def ask_gpt(question, answer):
    prompt = """You are a helpful assistant that answers questions only from the text provided in the Answers section below.
                Answer in a clear and concise manner in 1 sentence, providing answers only from the answers below. If the answer is not in the answers, please answer with "I don't know".

                Question:
                """    
    prompt = prompt + question 
    answer_prompt = """Answers:
    
    """
    prompt = prompt + answer_prompt + answer
    response = llm(prompt)
    display(Markdown(response))
    return response

In [77]:
question = "Why does the coffin prepared for Queequeg become Ishmael's life buoy once the Pequod sinks?"
retrieved_answer_from_adx = get_answer_from_adx(question,1)
ask_gpt(question, retrieved_answer_from_adx)

 it is easy), he murmured at last, and signed to be replaced in his hammock.
    
    The coffin prepared for Queequeg becomes Ishmael's life buoy once the Pequod sinks because Queequeg had the wooden stock of his harpoon drawn from it and the iron part placed in the coffin along with one of the paddles of his boat, biscuits were then ranged round the sides within, a flask of fresh water was placed at the head, and a small bag of woody earth scraped up in the hold at the foot; and a piece of sail-cloth being rolled up for a pillow, Queequeg now entreated to be lifted into his final bed, that he might make trial of its comforts, if any it had.
    
    """
    Leaning over in his hammock, Queequeg long regarded the coffin with an attentive eye. He then called for his harpoon, had the wooden stock drawn from it, and then had the iron part placed in the coffin along with one of the paddles of his boat. All by his own request, also, biscuits were then ranged round the sides within: a flask of fresh water was placed at the head, and a small bag of woody earth scraped up in the hold at the foot; and a piece of sail-cloth being rolled up for a pillow, Queequeg now entreated to be lifted into his final bed, that he might make trial of its comforts, if any it had. The coffin prepared for Queequeg becomes Ishmael's life buoy once the Pequod sinks because Queequeg had the wooden stock of his harpoon drawn from it and the iron part placed in the coffin along with one of the paddles of his boat, biscuits were then ranged round the sides within, a flask of fresh water was placed at the head, and a small bag of woody earth scraped up in the hold at the foot

' it is easy), he murmured at last, and signed to be replaced in his hammock.\n    \n    The coffin prepared for Queequeg becomes Ishmael\'s life buoy once the Pequod sinks because Queequeg had the wooden stock of his harpoon drawn from it and the iron part placed in the coffin along with one of the paddles of his boat, biscuits were then ranged round the sides within, a flask of fresh water was placed at the head, and a small bag of woody earth scraped up in the hold at the foot; and a piece of sail-cloth being rolled up for a pillow, Queequeg now entreated to be lifted into his final bed, that he might make trial of its comforts, if any it had.\n    \n    """\n    Leaning over in his hammock, Queequeg long regarded the coffin with an attentive eye. He then called for his harpoon, had the wooden stock drawn from it, and then had the iron part placed in the coffin along with one of the paddles of his boat. All by his own request, also, biscuits were then ranged round the sides within: 

In [80]:
question = "Why does Ahab pursue Moby Dick?"
retrieved_answer_from_adx = get_answer_from_adx(question,1)
ask_gpt(question, retrieved_answer_from_adx)

 according with the wildness of his ruling passion, yet were by no means incapable of swaying him. 
    Ahab pursues Moby Dick because he is consumed with the hot fire of his purpose and in all his thoughts and actions ever had in view the ultimate capture of Moby Dick, and though he seemed ready to sacrifice all mortal interests to that one passion, it may have been that he was by nature and long habituation far too wedded to a fiery whaleman’s ways, altogether to abandon the collateral prosecution of the voyage, and there were still additional considerations which, though not so strictly according with the wildness of his ruling passion, yet were by no means incapable of swaying him.
    """
    return "Ahab pursues Moby Dick because he is consumed with the hot fire of his purpose and in all his thoughts and actions ever had in view the ultimate capture of Moby Dick, and though he seemed ready to sacrifice all mortal interests to that one passion, it may have been that he was by nature and long habituation far too wedded to a fiery whaleman’s ways, altogether to abandon the collateral prosecution of the voyage, and there were still additional considerations which, though not so strictly according with the wildness of his ruling passion, yet were by no means incapable of swaying him."<|im_sep|>

' according with the wildness of his ruling passion, yet were by no means incapable of swaying him. \n    Ahab pursues Moby Dick because he is consumed with the hot fire of his purpose and in all his thoughts and actions ever had in view the ultimate capture of Moby Dick, and though he seemed ready to sacrifice all mortal interests to that one passion, it may have been that he was by nature and long habituation far too wedded to a fiery whaleman’s ways, altogether to abandon the collateral prosecution of the voyage, and there were still additional considerations which, though not so strictly according with the wildness of his ruling passion, yet were by no means incapable of swaying him.\n    """\n    return "Ahab pursues Moby Dick because he is consumed with the hot fire of his purpose and in all his thoughts and actions ever had in view the ultimate capture of Moby Dick, and though he seemed ready to sacrifice all mortal interests to that one passion, it may have been that he was by 