# Enhance Microsoft Copilot with Elasticsearch

This notebook execute an API that allows you to search for invoices using Elasticsearch generating a Ngrok tunnel to expose the API to the internet. This notebook is based on the article [Enhance Microsoft Copilot with Elasticsearch](https://www.elastic.co/blog/enhance-microsoft-copilot-with-elasticsearch).

In [34]:
%pip install fastapi pyngrok uvicorn nest-asyncio elasticsearch==8 -q

Note: you may need to restart the kernel to use updated packages.


In [35]:
import os
import json
from getpass import getpass
from datetime import datetime

import nest_asyncio
import uvicorn

from fastapi import FastAPI, Query
from pyngrok import conf, ngrok

from elasticsearch.helpers import bulk
from elasticsearch import Elasticsearch

## Setup Variables

In [None]:
os.environ["ELASTICSEARCH_ENDPOINT"] = getpass("Elastic Endpoint: ")
os.environ["ELASTICSEARCH_API_KEY"] = getpass("Elastic Api Key: ")
os.environ["NGROK_AUTH_TOKEN"] = getpass("Ngrok Auth Token: ")


## Elasticsearch client

In [39]:
INDEX_NAME = "invoices-july-ms-demo"

_client = Elasticsearch(
    os.environ["ELASTICSEARCH_ENDPOINT"],
    api_key=os.environ["ELASTICSEARCH_API_KEY"],
)

try:
    print(_client.info())
except Exception as e:
    print("Elasticsearch connection failed:", e)


{'name': 'instance-0000000001', 'cluster_name': '866835bbfb764f628f65995818b8f749', 'cluster_uuid': '3ymdk30mSx2N8ZxdJ6GgFA', 'version': {'number': '8.17.5', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '55fce5db33c28431e3a504c47d359ddbeadde69d', 'build_date': '2025-04-09T22:07:03.265074501Z', 'build_snapshot': False, 'lucene_version': '9.12.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}


## Mappings

In [40]:
try:
    _client.indices.create(
        index=INDEX_NAME,
        body={
            "mappings": {
                "properties": {
                    "id": {"type": "keyword"},
                    "file_url": {"type": "keyword"},
                    "issue_date": {"type": "date"},
                    "description": {"type": "text", "copy_to": "semantic_field"},
                    "services": {
                        "type": "object",
                        "properties": {
                            "name": {
                                "type": "text",
                                "copy_to": "semantic_field",
                            },
                            "price": {"type": "float"},
                        },
                    },
                    "total_amount": {
                        "type": "float",
                    },
                    "semantic_field": {"type": "semantic_text"},
                }
            }
        },
    )

    print("index created successfully")
except Exception as e:
    print(
        f"Error creating inference endpoint: {e.info['error']['root_cause'][0]['reason'] }"
    )

  _client.indices.create(


index created successfully


## Ingesting documents to Elasticsearch

In [41]:
with open("invoices_data.json", "r", encoding="utf-8") as f:
    invoices = json.load(f)

In [42]:
def build_data():
    for doc in invoices:
        yield {"_index": INDEX_NAME, "_source": doc}


try:
    success, errors = bulk(_client, build_data())
    print(f"{success} documents indexed successfully")

    if errors:
        print("Errors during indexing:", errors)

except Exception as e:
    print(f"Error: {str(e)}, please wait some seconds and try again.")

Error: Connection timed out, please wait some seconds and try again.


## Building API

In [45]:
app = FastAPI()

In [46]:
@app.get("/search/semantic")
async def search_semantic(query: str = Query(None)):
    try:
        result = _client.search(
            index=INDEX_NAME,
            query={
                "semantic": {
                    "field": "semantic_field",
                    "query": query,
                }
            },
        )

        hits = result["hits"]["hits"]
        results = [{"score": hit["_score"], **hit["_source"]} for hit in hits]

        return results
    except Exception as e:
        return Exception(f"Error: {str(e)}")


@app.get("/search/by-date")
async def search_by_date(from_date: str = Query(None), to_date: str = Query(None)):
    try:
        from_dt = datetime.strptime(from_date, "%m/%d/%Y %I:%M:%S %p")
        to_dt = datetime.strptime(to_date, "%m/%d/%Y %I:%M:%S %p")

        formatted_from = from_dt.strftime("%d/%m/%Y")
        formatted_to = to_dt.strftime("%d/%m/%Y")

        result = _client.search(
            index=INDEX_NAME,
            query={
                "range": {
                    "issue_date": {
                        "gte": formatted_from,
                        "lte": formatted_to,
                        "format": "dd/MM/yyyy",
                    }
                }
            },
        )

        hits = result["hits"]["hits"]
        results = [hit["_source"] for hit in hits]

        return results
    except Exception as e:
        return Exception(f"Error: {str(e)}")

## Running the API

In [47]:
conf.get_default().auth_token = os.environ["NGROK_AUTH_TOKEN"]
ngrok_tunnel = ngrok.connect(8000)

print("Public URL:", ngrok_tunnel.public_url)

nest_asyncio.apply()
uvicorn.run(app, port=8000)

Public URL: https://ae69453817fe.ngrok-free.app


INFO:     Started server process [4459]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     2405:201:e007:4814:d8e8:8aae:e57f:3eff:0 - "GET / HTTP/1.1" 404 Not Found
INFO:     2405:201:e007:4814:d8e8:8aae:e57f:3eff:0 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     2405:201:e007:4814:d8e8:8aae:e57f:3eff:0 - "GET / HTTP/1.1" 404 Not Found
INFO:     20.192.168.96:0 - "GET /search/semantic?query=top%20spent%20invoices HTTP/1.1" 200 OK
INFO:     20.192.168.96:0 - "GET /search/semantic?query=top%20spent%20invoices%20for%20dinner HTTP/1.1" 200 OK
INFO:     20.192.168.96:0 - "GET /search/semantic?query=Electronics%20and%20android%20services HTTP/1.1" 200 OK
INFO:     20.192.168.96:0 - "GET /search/by-date?from_date=04%2F01%2F2025%2012%3A00%3A00%20AM&to_date=04%2F30%2F2025%2011%3A59%3A59%20PM HTTP/1.1" 200 OK
INFO:     20.192.168.96:0 - "GET /search/semantic?query=highest%20spent%20invoice HTTP/1.1" 200 OK
INFO:     20.192.168.96:0 - "GET /search/by-date?from_date=04%2F01%2F2025%2012%3A00%3A00%20AM&to_date=04%2F30%2F2025%2011%3A59%3A59%20PM HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [4459]


## Delete the index

In [None]:
def print_results(results):
    if results.get("acknowledged", False):
        print("DELETED successfully.")

    if "error" in results:
        print(f"ERROR: {results['error']['root_cause'][0]['reason']}")


# Cleanup - Delete Index
result = _client.indices.delete(index=INDEX_NAME, ignore=[400, 404])
print_results(result)