### 1) Install and import necessary packages

In [None]:
# Standard imports
import openai  
import requests
import json
import os
import math
import base64
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt

from io import BytesIO
from PIL import Image
from tenacity import retry, stop_after_attempt, wait_random_exponential
from dotenv import load_dotenv

# Azure Credentials
from azure.core.credentials import AzureKeyCredential

# Azure Cognitive Search imports
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorFilterMode, RawVectorQuery
from azure.search.documents.indexes.models import (  
    SearchFieldDataType,
    SimpleField,
    SearchableField,
    SearchField,
    VectorSearch,
    HnswVectorSearchAlgorithmConfiguration,
    VectorSearchAlgorithmKind,
    HnswParameters,
    ExhaustiveKnnVectorSearchAlgorithmConfiguration,
    ExhaustiveKnnParameters,
    VectorSearchProfile,
    SearchIndex
) 

In [None]:
# Central variables image search:
if not load_dotenv('./mydotenv.env'): raise Exception(".env file not found")

# Azure OpenAI
api_base = '<your_azure_openai_endpoint>' 
deployment_name = '<your_deployment_name>'
API_KEY = '<your_azure_openai_key>'

base_url = f"{api_base}openai/deployments/{deployment_name}" 



# Azure Computer Vision
key = os.getenv("AZURE_CV_KEY")
endpoint = os.getenv("AZURE_CV_ENDPOINT")



# Azure Cognitive Search
cs_key = os.getenv("COG_SEARCH_ADMIN_KEY")
cs_endpoint = os.getenv("COG_SEARCH_ENDPOINT")


### 2) Helper Functions

In [None]:

# Function to encode the image
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')




headers = {   
    "Content-Type": "application/json",   
    "api-key": API_KEY 
} 


# Function to get the Image description for the input image using GPT4-vision Chat Completion API
def get_image_description(image_path,prompt):
      
    base64_image = encode_image(image_path=image_path)

    payload = {
        "model": "gpt-4-vision-preview",
        "messages": [
          {
            "role": "user",
            "content": [
              {
                "type": "text",
                "text": f"{prompt}?"
              },
              {
                "type": "image_url",
                "image_url": {
                  "url": f"data:image/jpeg;base64,{base64_image}"
                },
              }
            ]
          }
        ],
        "max_tokens": 1000
    }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

    temp = response.json()
    return temp['choices'][0]['message']['content']








# Function to generate embeddings for title and content fields, also used for query embeddings
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def get_text_embeddings(text):
    response = openai.Embedding.create(
        input=text, engine="text-embedding-ada-002")
    embeddings = response['data'][0]['embedding']
    return embeddings









def show_images(images, cols=2, source='url', savedir='', show_title=False, titles=None):
    """
    Get images from URL and display them in a grid. Optionally save or retrieve images to/from local dir. 
    
    Parameters
    ----------
    images : list
        List of image urls or local file paths.
    cols : int
        Number of columns in the grid.
    source : str
        'url' or 'local'
    savedir : str
        Directory to save images to.
    show_title : bool
        Display filename as image title (local files only)
    """
    
    if savedir != '':
        os.makedirs(savedir, exist_ok=True)
        
    rows = int(math.ceil(len(images) / cols))

    fig = plt.figure(figsize=(cols * 5, rows * 5)) # specifying the overall grid size. TODO: 7,5 for landscape images

    for i, image_url in enumerate(images):
        plt.subplot(rows, cols,i+1)  
        
        if source == 'url':
            response = requests.get(image_url)
            img = Image.open(BytesIO(response.content))
            
            # save images if savedir is specified
            if savedir != '':
                
                # get list of png files
                png_filenames = [image for image in os.listdir(savedir) if image.endswith('.png')]
                # get highest index from existing files
                if png_filenames == []:
                    max_index = 0
                else:
                    max_index = max([int(filename.strip('.png')) for filename in png_filenames])

                # save new file with index + 1
                new_filename = f'{max_index+1:03d}.png'
                fp = os.path.join(savedir, new_filename)
                img.save(fp, 'PNG')            
            
        else: 
            img = Image.open(image_url) # local file
            if show_title:
                if titles is None: plt.title(image_url)
                else: plt.title(titles[i])


        plt.imshow(img)
        plt.axis('off')

    fig.tight_layout()

    plt.show()

### 3) Data preparation

In [None]:

# Initialize DataFrame
df = pd.DataFrame(columns=['Image_file', 'Image_description', 'description_embeddings'])


# Specify the directory where images are stored
image_root = './images - Copy'

if not os.path.exists(image_root):
    raise FileNotFoundError(f"Directory '{image_root}' not found")






# Adjust the delay time
delay_between_requests = 2  # seconds


# Iterate through image files in the 'images' directory
idx = 0
for file in os.listdir(image_root):
    if file.endswith('.png') or file.endswith('.jpg'):
        full_path = os.path.join(image_root, file)

        # Fetch Image_description for the image
        prompt = "Describe this image to me in detail"
        image_description = get_image_description(imagefile=full_path, prompt=prompt)

        # Fetch embeddings for the image descriptions
        embedding = get_text_embeddings(image_description)

        # Check if Image description is obtained successfully before proceeding
        if image_description and embedding is not None:
            row = [full_path, image_description, embedding]
            df.loc[idx] = row
            idx += 1
        time.sleep(delay_between_requests)




#Display top 6 records and one random image
display(df.head(6))
sample = df.sample(1)
show_images(images=[sample.file.values[0]], source='local')




# Transform the DataFrame and create a dictionary of data content
df["id"] = df.index.astype(str)
df.columns = ['Image_file', 'Image_description', 'description_embeddings', 'id']
docs = df.to_dict('records')


### 4) Create the Azure Cognitive search with DataFrame content

In [None]:
# Azure credentials
credential = AzureKeyCredential(cs_key)


# Index name
index_name = "img_search"




# Create a search index
index_client = SearchIndexClient(endpoint=cs_endpoint, credential=credential)

fields = [
    SimpleField    (name = "id"       , type = SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
    SearchableField(name = "Image_file"    , type = SearchFieldDataType.String),
    SearchableField(name = "Image_description"    , type = SearchFieldDataType.String),
    SearchField    (name = "description_embeddings", type = SearchFieldDataType.Collection(SearchFieldDataType.Single),searchable=True, vector_search_dimensions=1024, vector_search_profile="myHnswProfile"),
]



# Configure the vector search configuration  
vector_search = VectorSearch(
    algorithms=[
        HnswVectorSearchAlgorithmConfiguration(
            name="myHnsw",
            kind=VectorSearchAlgorithmKind.HNSW,
            parameters=HnswParameters(
                m=4,
                ef_construction=400,
                ef_search=500,
                metric= "cosine"
            )
        ),
        ExhaustiveKnnVectorSearchAlgorithmConfiguration(
            name="myExhaustiveKnn",
            kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
            parameters=ExhaustiveKnnParameters(
                metric= "cosine"
            )
        )
    ],
    profiles=[
        VectorSearchProfile(
            name="myHnswProfile",
            algorithm="myHnsw"
        ),
        VectorSearchProfile(
            name="myExhaustiveKnnProfile",
            algorithm="myExhaustiveKnn"
        )
    ],
)




# Create the search index with the semantic settings
index = SearchIndex(name = index_name, fields=fields, vector_search=vector_search)
result = index_client.create_or_update_index(index)
print(f' {result.name} created')



# Populate the index
search_client = SearchClient(endpoint = cs_endpoint, index_name=index_name, credential=credential)
result = search_client.upload_documents(docs)
print(f"Uploaded {len(docs)} documents") 

### 5) Search Results

In [None]:
query = "How to set Desired Speed?"
  
search_client = SearchClient(cs_endpoint, index_name, credential=credential)  

vector_query = RawVectorQuery(vector=get_text_embeddings(query), k=5, fields="description_embeddings")

results = search_client.search(  
    search_text = None,  
    vector_filter_mode=VectorFilterMode.PRE_FILTER,
    vector_queries = [vector_query],
    select=['Image_file', 'Image_description'],
    top=5
)  



img_paths = []  
for result in results:  
    print(f"Title: {result['Image_file']}")  
    print(f"Score: {result['@search.score']}")  
    img_paths.append(result['Image_file'])


show_images(images=img_paths, cols=5, source='local')