# **2. Vector Search Configuration**


In [4]:
from dotenv import load_dotenv, dotenv_values, find_dotenv
from pathlib import Path
import json
import os
from sentence_transformers import SentenceTransformer

In [5]:
from azure.core.credentials import AzureKeyCredential  
from azure.search.documents import SearchClient, SearchIndexingBufferedSender  
from azure.search.documents.indexes import SearchIndexClient  
from azure.search.documents.models import (
    QueryAnswerType,
    QueryCaptionType,
    QueryCaptionResult,
    QueryAnswerResult,
    SemanticErrorMode,
    SemanticErrorReason,
    SemanticSearchResultsType,
    QueryType,
    VectorizedQuery,
    VectorQuery,
    VectorFilterMode,    
)
from azure.search.documents.indexes.models import (  
    ExhaustiveKnnAlgorithmConfiguration,
    ExhaustiveKnnParameters,
    SearchIndex,  
    SearchField,  
    SearchFieldDataType,  
    SimpleField,  
    SearchableField,  
    SearchIndex,  
    SemanticConfiguration,  
    SemanticPrioritizedFields,
    SemanticField,  
    SearchField,  
    SemanticSearch,
    VectorSearch,  
    HnswAlgorithmConfiguration,
    HnswParameters,  
    VectorSearch,
    VectorSearchAlgorithmConfiguration,
    VectorSearchAlgorithmKind,
    VectorSearchProfile,
    SearchIndex,
    SearchField,
    SearchFieldDataType,
    SimpleField,
    SearchableField,
    VectorSearch,
    ExhaustiveKnnParameters,
    SearchIndex,  
    SearchField,  
    SearchFieldDataType,  
    SimpleField,  
    SearchableField,  
    SearchIndex,  
    SemanticConfiguration,  
    SemanticField,  
    SearchField,  
    VectorSearch,  
    HnswParameters,  
    VectorSearch,
    VectorSearchAlgorithmKind,
    VectorSearchAlgorithmMetric,
    VectorSearchProfile,
)  
  




In [9]:
# Configure environment variables  

dotenv_path = Path(r"C:\Users\Seyed Barabadi\Downloads\Gen AI\AZURE-AI-VECTOR-SEARCH-main\azure_ai_vector_search\notebooks\keys.env")
load_dotenv(dotenv_path=dotenv_path) 



service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT") 
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME") 
key = os.getenv("AZURE_SEARCH_ADMIN_KEY") 
model = os.getenv("MODEL_NAME")
print(index_name, model)


credential = AzureKeyCredential(key)
# credential 

biology all-MiniLM-L6-v2


### **Fields:**

In [10]:
index_client = SearchIndexClient(endpoint=service_endpoint,
                                 credential=credential)

index_client

<azure.search.documents.indexes._search_index_client.SearchIndexClient at 0x18ba1622b50>

In [11]:
fields = [ # all the keys in the input data Json file 
        SimpleField(name="id",
                    type=SearchFieldDataType.String, 
                    key=True,
                    sortable=True, 
                    filterable=True,
                    facetable=True),
        SearchableField(name="line",
                        type=SearchFieldDataType.String),
        SearchableField(name="filename",
                        type=SearchFieldDataType.String,
                        filterable=True,
                        facetable=True),
        SearchField(name="embedding",
                    type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                    searchable=True,
                    vector_search_dimensions=384, 
                    vector_search_profile_name="myHnswProfile")
    ]

### **Vector Profiles:**
1. `algorithms`
2. `profiles`
For more info: https://github.com/Azure/azure-search-vector-samples/blob/main/demo-python/code/basic-vector-workflow/azure-search-vector-python-sample.ipynb

In [12]:
# Configure the vector search profile configuration  
vector_search = VectorSearch(
                            algorithms=[
                                        HnswAlgorithmConfiguration(
                                            name="myHnsw",
                                            kind=VectorSearchAlgorithmKind.HNSW,
                                            parameters=HnswParameters(
                                                                    m=4,
                                                                    ef_construction=400,
                                                                    ef_search=500,
                                                                    metric=VectorSearchAlgorithmMetric.COSINE
                                                                    )
                                        ), 
                                        ExhaustiveKnnAlgorithmConfiguration(
                                                                            name="myExhaustiveKnn",
                                                                            kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
                                                                            parameters=ExhaustiveKnnParameters(
                                                                                                               metric=VectorSearchAlgorithmMetric.COSINE
                                                                                                               )
                                        )
                                    ],
                            profiles=[
                                    VectorSearchProfile(
                                        name="myHnswProfile",
                                        algorithm_configuration_name="myHnsw",
                                    ),
                                    VectorSearchProfile(
                                        name="myExhaustiveKnnProfile",
                                        algorithm_configuration_name="myExhaustiveKnn",
                                    )
                                ]
                        )

### **Semantic Search:**

In [16]:
semantic_config = SemanticConfiguration(
    name="my-semantic-config",
    prioritized_fields=SemanticPrioritizedFields(
                                                content_fields=[SemanticField(field_name="line")],
                                                keywords_fields=[SemanticField(field_name="filename")]
                                            )
)

In [18]:
# Create the semantic settings with the configuration
semantic_search = SemanticSearch(configurations=[semantic_config])


# Create the search index with the semantic settings
index = SearchIndex(name=index_name, # biology
                    fields=fields, # id, line, embedding, filename
                    vector_search=vector_search, 
                    semantic_search=semantic_search)


result = index_client.create_or_update_index(index)
print(f' {result.name} created')


In [38]:
# Upload some documents to the index
with open('../output/docVectors.json', 'r') as file:  
    documents = json.load(file)  

In [40]:
documents

[{'id': '0',
  'line': '46 BIOLOGY\nWhen you look around, you will observe different animals with different\nstructures and forms.  As over a million species of animals have been\ndescribed till now, the need for classification becomes all the more\nimportant. The classification also helps in assigning a systematic position\nto newly described species.\n4',
  'embedding': [-0.012117701582610607,
   -0.03081829659640789,
   0.041076067835092545,
   -0.027501830831170082,
   0.010043774731457233,
   0.001646480173803866,
   -0.08494453877210617,
   -0.044164810329675674,
   0.07236697524785995,
   0.04246310517191887,
   -0.03732886537909508,
   -0.0975663810968399,
   -0.0752120092511177,
   0.059085238724946976,
   -0.026296483352780342,
   -0.01673942245543003,
   -0.02365313470363617,
   0.022587975487113,
   -0.020158400759100914,
   0.004874376114457846,
   0.033988457173109055,
   0.019861189648509026,
   0.030605308711528778,
   -0.014682733453810215,
   -0.11090463399887085,
   

In [None]:
search_client = SearchClient(endpoint=service_endpoint, index_name=index_name, credential=credential)
result = search_client.upload_documents(documents)
print(f"Uploaded {len(documents)} documents") 
