# Upload - Update and delete documents on an index

* Delete documents
* Upload documents
* Update documents


This functions were developed for an implementation on a sharepoint site

In [3]:
from openai import AzureOpenAI
from tenacity import retry, wait_random_exponential, stop_after_attempt  
from azure.core.credentials import AzureKeyCredential 
from azure.search.documents import SearchClient  

import os
from dotenv import load_dotenv

In [4]:
load_dotenv("pfd_conf.env")

connect_str = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
service_endpoint = os.getenv("AZURE_STORAGE_SERVICE_ENDPOINT")

index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")
key = os.getenv("AZURE_SEARCH_KEY")

model = os.getenv("AZURE_EMBEDDINGS_MODEL")
credential = AzureKeyCredential(key)
endpoint = os.getenv("AZUREOPENAI_ENDPOINT")
deployment = os.getenv("AZUREOPENAI_DEPLOYMENT")


container_name = "containername"  
blob_name = 'original.json'  
blob_vectorname = 'combinedVector.json'



In [None]:
client_embeddings = AzureOpenAI(
  api_key = os.getenv("OPENAI_EMBEDDINGS_KEY"),  
  api_version = os.getenv("OPENAI_EMBEDDINGS_VERSION"),
  azure_endpoint = os.getenv("OPENAI_EMBEDDINGS_ENDPOINT")
)

In [5]:
@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(6))
def generate_embeddings(text, model=model):
    return client_embeddings.embeddings.create(input = [text], model=model).data[0].embedding


# Performe the function a selected index

In [7]:
def selecciona_indice(folder):
    ret = True
    if folder == "ARGENTINA":
        index_name = "argentinaindex"
        ret = True
    elif folder == "MÉXICO":
        index_name = "mexicoindex"
        ret = True
    elif folder == "ECUADOR":
        index_name = "ecuadorindex"
        ret = True
    elif folder == "PERÚ":
        index_name = "peruindex"
        ret = True
    else:
        ret = False
        index_name = ""
    return ret, index_name

In [8]:
def upload_or_update(folder, filename, content, index_name):
    search_client = SearchClient(endpoint=service_endpoint, index_name=index_name, credential=credential)
    results = list(search_client.search(search_text=filename, highlight_fields="filename", filter=f"filename eq '{filename}'"))
    
    document_count = search_client.get_document_count()
    
    if not results:
        print("Subir")
        documents_to_upsert = [{
            "id": str(document_count + 1),
            "Area": folder,
            "filename": filename,
            "content": content
        }]
    else:
        # If results are found, update the existing document

        print("Actualizar")
        documents_to_upsert = [{
            "id": results[0]['id'],  
            "Area": folder,
            "filename": filename,
            "content": content
        }]
    
    # Common processing for both new and existing documents
    for doc in documents_to_upsert:
        doc['AreaVector'] = generate_embeddings(doc['Area'], model)
        doc['contentVector'] = generate_embeddings(doc['content'], model)
        doc['filenameVector'] = generate_embeddings(doc['filename'], model)
    
    upsert_result = search_client.merge_or_upload_documents(documents=documents_to_upsert)


In [9]:
def borrar_documento(filename, index_name):
    search_client = SearchClient(endpoint=service_endpoint, index_name=index_name, credential=credential)
    
    results = list(search_client.search(search_text=filename, highlight_fields="filename", filter = f"filename eq '{filename}'"))

    for result in results:
        print(f"Found document: {result['id']} with Field1: {result['filename']}")
    if results:
        documents_to_delete = [{"id": result['id']}]
        delete_results = search_client.delete_documents(documents=documents_to_delete)

        for result in delete_results:
            if result.succeeded:
                print(f"Document with ID {result.key} deleted successfully.")
            else:
                print(f"Failed to delete document with ID {result.key}. Error: {result.error_message}")
    else:
        print("El archivo no existe")

In [10]:
def realiza_accion(folder, filename, content = None,  accion = "subir"):
    ret, index_name = selecciona_indice(folder)
    if ret:
        if accion == "subir":
            upload_or_update(folder, filename, content=content, index_name=index_name)
            upload_or_update(folder, filename, content=content, index_name="someindex")
        elif accion == "borrar":
            borrar_documento(filename, index_name)
            borrar_documento(filename, "someindex")
        elif accion == "modificar":
            upload_or_update(folder, filename, content=content, index_name=index_name)
            upload_or_update(folder, filename, content=content, index_name="someindex")
        else:
            print("Acción no válida")
    else:
        print("Carpeta no válida")

In [27]:
folder = "MÉXICO"
filename = "pepe.pdf"
accion = "subir"
content = "Hola, soy pepe y soy increible"

In [28]:
realiza_accion(folder=folder, filename=filename, content=content, accion="subir")

Subir
Subir
