In [1]:
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchIndex,  
    SearchableField,
    SearchField,
    SearchFieldDataType,
    SimpleField,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
)

In [2]:
from langchain_openai import AzureOpenAIEmbeddings
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
import os

azure_embeddings_deployment = os.getenv('AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME')
azure_oai_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
azure_oai_key = os.getenv('AZURE_OPENAI_API_KEY')

credential = DefaultAzureCredential()
azure_search_endpoint = os.getenv('AZURE_SEARCH_ENDPOINT')
azure_search_key = os.getenv('AZURE_SEARCH_KEY')


embeddings = AzureOpenAIEmbeddings(azure_deployment=azure_embeddings_deployment,
                                       azure_endpoint=azure_oai_endpoint,
                                       api_key=azure_oai_key)

index_client = SearchIndexClient(azure_search_endpoint,AzureKeyCredential(azure_search_key))

In [12]:

embedding_function = embeddings.embed_query
fields = [
        SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True),
        SearchableField(name="content",type=SearchFieldDataType.String,searchable=True),
        SearchField(name="content_vector",
                    type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                    searchable=True,
                    vector_search_dimensions=len(embedding_function("Text")),
                    vector_search_profile_name = "myHnswProfile",  
                    vector_search_configuration="default"),
        SearchableField( name="metadata", type=SearchFieldDataType.String, searchable=True),
        
        SimpleField(name="source", type=SearchFieldDataType.String, filterable=True,)
    ]

vector_search = VectorSearch(algorithms=[HnswAlgorithmConfiguration(name="myHnsw")],
                                 profiles=[VectorSearchProfile(name="myHnswProfile", algorithm_configuration_name="myHnsw")])
    
index = SearchIndex(name="hackaton-2025", fields=fields, vector_search=vector_search)
result = index_client.create_or_update_index(index)


In [8]:
import glob
from langchain_community.document_loaders import PyPDFLoader, UnstructuredMarkdownLoader,TextLoader
from pathlib import Path
import pathlib

files_list = glob.glob("../api/index_documents/*")
documents = []
for file in files_list:
    file_extension = pathlib.Path(file).suffix
    #file_path = os.path.join(download_folder, file)
    if file_extension == '.pdf':
                loader = PyPDFLoader(file)
                pages = loader.load_and_split()
                documents.append(pages)
    elif file_extension in ['.txt', '.json']:
                loader = TextLoader(file)
                pages = loader.load_and_split()
                documents.append(pages)
    elif file_extension == '.md':
                loader = UnstructuredMarkdownLoader(file)
                pages = loader.load_and_split()
                documents.append(pages)
                
    



In [10]:
from azure.search.documents import SearchClient
search_client = SearchClient(azure_search_endpoint, "hackaton-2025", AzureKeyCredential(azure_search_key))

In [13]:
import json
document_batch = []
count = 0
for i in range(len(documents)):
    for k in range(len(documents[i])):
        count += 1
        content = str(documents[i][k].page_content)
        document_batch.append(
                    {
                        "id": str(count),
                        "content": str(documents[i][k].page_content),
                        "content_vector": embeddings.embed_query(str(documents[i][k].page_content)),
                        "metadata": json.dumps(documents[i][k].metadata),
                        "source": documents[i][k].metadata.get('source', 'unknown')
                    }
                )
search_client.upload_documents(documents=document_batch)

[<azure.search.documents._generated.models._models_py3.IndexingResult at 0x1f317535cd0>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x1f317a798d0>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x1f317a7afd0>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x1f317a78c50>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x1f317a78f90>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x1f317a79d50>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x1f317a78190>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x1f317a79c90>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x1f317a7a590>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x1f317a7b850>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x1f317a7b990>,
 <azure.search.docume

In [20]:
cc = search_client.get_document_count()


files = []
for i in range(cc):
    file = search_client.get_document(i+1)['source'].split("\\")[-1]
    if file not in files:
        files.append(file)

In [34]:
from azure.search.documents.models import VectorizedQuery
from openai import AzureOpenAI

client = AzureOpenAI(
  api_key = os.getenv("AZURE_OPENAI_API_KEY"),  
  api_version = "2024-02-01",
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
)

model = os.getenv('AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME')

def get_context_from_aisearch(query):
        embedded_query = client.embeddings.create(input = [query], model=model).data[0].embedding
        vector_query = VectorizedQuery(vector=embedded_query, k_nearest_neighbors=5, fields="content_vector")
        response = search_client.search(search_text = None, vector_queries=[vector_query])

        full_response = ""
        for r in response:
            full_response += str(r['content']) + '\n\n'
        
        return full_response

In [36]:
print(get_context_from_aisearch("tell me bout genai in art"))

Art and the science of generative AI: A deeper dive
Ziv Epstein1∗, Aaron Hertzmann2, Laura Herman3,4, Robert Mahari1,5,
Morgan R. Frank 6, Matthew Groh1, Hope Schroeder1, Amy Smith7,
Memo Akten8, Jessica Fjeld5, Hany Farid9, Neil Leach10,
Alex “Sandy” Pentland1, Olga Russakovsky11
1MIT Media Lab
2Adobe Research
3University of Oxford
4Adobe, Inc.
5Harvard Law School
6University of Pittsburgh
7Queen Mary University London
8University of California, San Diego
9University of California, Berkeley
10Florida International University
11Princeton University
∗To whom correspondence should be addressed; E-mail: zive@mit.edu.
A new class of tools, colloquially calledgenerative AI, can produce high-quality
artistic media for visual arts, concept art, music, fiction, literature, video, and
animation. The generative capabilities of these tools are likely to fundamen-
tally alter the creative processes by which creators formulate ideas and put
them into production. As creativity is reimagined, so too 