In [1]:
import os
import base64
import re
import csv
from dotenv import load_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import *
from azure.search.documents.models import VectorizedQuery
import openai
from openai import AzureOpenAI

In [9]:
load_dotenv()

# Get Environment settings from .env file
load_dotenv()

# Azure AI Search Index Settings
service_endpoint = f"{os.getenv('AZURE_SEARCH_SERVICE_ENDPOINT')}"
index_creds = AzureKeyCredential(os.getenv("AZURE_SEARCH_INDEX_KEY"))
index_name = "images-keyword-search"

## Create a client for querying the index
search_client = SearchClient(endpoint=service_endpoint, index_name=index_name, credential=index_creds)
## Create an index
index_client = SearchIndexClient(service_endpoint, index_creds)

# Azure Openai Settings
openai.api_type = "azure"
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.azure_endpoint = os.getenv("OPENAI_API_ENDPOINT")
openai.api_version = os.getenv("OPENAI_API_VERSION")

azure_openai_client = AzureOpenAI(
    api_key = os.getenv("OPENAI_API_KEY"),
    api_version = os.getenv("OPENAI_API_VERSION"),
    azure_endpoint = os.getenv("OPENAI_API_ENDPOINT")
)

In [13]:
def get_embedding(text, model="textembedding"): # model=[Deployment Name], DONOT change this
   text = text.replace("\n", " ")
   return azure_openai_client.embeddings.create(input = [text], model=model).data[0].embedding

sections = []
with open('outputupdated.csv', 'rt', newline='', encoding='utf-8', errors='ignore') as csvfile:
    csvreader = csv.reader(csvfile)
    item_num = 0
    for item in csvreader:
        section = {
            "id": f"{item_num}",
            "Image_name": item[0],
            "Image_path": item[1],
            "Caption": item[4]
        }
        item_num += 1
        sections.append(section)
print(f"Finished Indexing: {len(sections)} items in total")


Finished Indexing: 71 items in total


In [12]:
index = SearchIndex(
    name=index_name,
    fields=[
        SimpleField(name="id", type="Edm.String", key=True),
        SearchableField(name="Image_name", type="Edm.String", analyzer_name="standard.lucene", 
                        filterable=True, sortable=True, facetable=True, searchable=True),
        SearchableField(name="Image_path", type="Edm.String", analyzer_name="standard.lucene",
                        filterable=True, sortable=True, facetable=True, searchable=True),
        SearchableField(name="Caption", type="Edm.String", analyzer_name="standard.lucene",
                        filterable=True, sortable=True, facetable=True, searchable=True),
    ]
)

index_client.create_or_update_index(index)

<azure.search.documents.indexes.models._index.SearchIndex at 0x1a22d9f6ba0>

In [14]:
results = search_client.upload_documents(documents=sections)
print("Uploading")
succeeded = sum([1 for r in results if r.succeeded])
print(f"Indexed {len(results)} sections, {succeeded} succeeded")
batch = []

Uploading
Indexed 71 sections, 71 succeeded


In [17]:
query = "Sir ho Kai?" #your query keywords

'''Vector Search for Images'''
# query_vector = get_embedding(query)
# r = search_client.search(
#     search_text=None,
#     top=3,
#     vector_queries=[VectorizedQuery(
#         vector=query_vector,
#         fields="Embedding"
#     )]
# )

'''Full text search'''
r = search_client.search(
    search_text=query,
    top=2
)

search_results = []
for result in r:
    print("#########################################")
    print("Source: " + result["Image_name"])
    print("Caption" + result["Caption"])
    search_results.append("Source: " + result["Image_name"] + "; Caption: " + result["Caption"])

#########################################
Source: Sir Ho Kai.jpg
CaptionDr. Ho Kai (later Sir Kai Ho Kai) barrister & medical doctor. Founder of the Alice Memorial Hospital, College of Medical and later, a most active promoter of University.
#########################################
Source: Sir Patrick Manson.jpg
CaptionDr. Patrick Manson (later Sir Patrick Manson) founder & first dean of the College of Medicine. He became known later as the “Father of Tropical Medicine”


In [13]:
'''Not this part on Generating Answer'''

systemMessage = """AI Assistant that helps user to answer questions from sources provided. Be brief in your answers.
                    Answer ONLY with the facts listed in the list of sources below. 
                    If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. 
                    Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. 
                    Use square brackets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].
                """

messages = [
    {'role' : 'system', 'content' : systemMessage},
    {'role' : 'user', 'content' : query + "   Source:" + " ".join(search_results)}
]

chat_completion = openai.chat.completions.create(
    model="summer", # Do not edit this. model="deployment_name"
    messages=messages, 
    temperature=0.7, 
    max_tokens=1024, 
    n=1)

chat_content = chat_completion.choices[0].message.content
print(chat_content)

The first library of the University of Hong Kong (HKU) is the East library in the Main Building. It was already in operation by 1917 [East Library, Main Building.jpg].
