### Demo for basic elasticsearch use
* delete index
* create index
* insert data into index
* query data by search terms

In [1]:
from elasticsearch.client import Elasticsearch

client = Elasticsearch([{"host": "localhost", "port": 9200}])

In [2]:
# delete test-index
client.indices.delete("*", ignore=[400, 404])  # you can pass these ignore flags if you do not care or know if the index exists
print(client.indices.get_alias("*"))  # show all indices

{}




In [3]:
# create new test index
client.indices.create("test-index")

# check if it exists
client.indices.exists("test-index")

True

In [4]:
# delete it
client.indices.delete("test-index")
client.indices.exists("test-index")

False

In [5]:
# some dummy data
data = [{"title": "wicked sentence", "text": "The quick brown fox jumps over the lazy dog."},
        {"title": "confusing statement", "text": "The boy saw the man with the telescope."}]

In [6]:
# when using client.index() for indexing data, a new index is automatically created if it does not exist
for id, document in enumerate(data):
    client.index("test-index", id=id, body=document)

print(client.indices.get_alias("*"))

{'test-index': {'aliases': {}}}


In [7]:
# get a document by id
res = client.get("test-index", id=1)
res

{'_index': 'test-index',
 '_type': '_doc',
 '_id': '1',
 '_version': 1,
 '_seq_no': 1,
 '_primary_term': 1,
 'found': True,
 '_source': {'title': 'confusing statement',
  'text': 'The boy saw the man with the telescope.'}}

In [8]:
# access the data in the response
res["_source"]

{'title': 'confusing statement',
 'text': 'The boy saw the man with the telescope.'}

### searching

In [9]:
# create query

## match query - the standard

body = {
    "from": 0,  # number of hits to skip, default 0
    "size": 2,  # the maximum number of hits to return, default 10
    "query": {
        "match": {
            "text": "Telescope"
        }
    }
}

res = client.search(index="test-index", body=body)
res

{'took': 1,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [10]:
for hit in res["hits"]["hits"]:
    print(f"ID: {hit['_id']}, SCORE: {hit['_score']}, SOURCE: {hit['_source']} \n")

In [11]:
body = {
    "from": 0,
    "size": 2,
    "query": {
        "match": {
            "text": "Telescope fox"  # not all terms need to match
        }
    }
}

res = client.search(index="test-index", body=body)
res

{'took': 1,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [12]:
# match-phrase query: more restrictive
# must match phrase, all terms need to match

body = {
    "from": 0,
    "size": 2,
    "query": {
        "match_phrase": {
            "text": "man with-the telescope"
        }
    }
}

res = client.search(index="test-index", body=body)
res

{'took': 1,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [13]:
# combine queries using bool
# must, must_not, should.
# should is less restrictive, like logical or, must is like logical and

body = {
    "from": 0,
    "size": 2,
    "query": {
        "bool": {
            "must_not": {
                "match": {
                    "title": "sentence"  # title field must not contain "sentence"
                }
            },
            "should": {
                "match": {
                    "text": "telescope fox"
                }
            }
        }
    }
}

res = client.search(index="test-index", body=body)
res

{'took': 1,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [14]:
# search using regex

body = {
    "query": {
        "regexp": {
            "text": ".*"  # match everything
        }
    }
}

res = client.search(index="test-index", body=body)
res

{'took': 1,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [15]:
# search using a query string
# Note that with query_string you may use wildcards, which you can't with match

body = {
    "query": {
        "query_string": {
            "query": "saw telescope"
        }
    }
}

res = client.search(index="test-index", body=body)
res

{'took': 1,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [18]:
from src.querying import search

res = search(client=client, index="test-index", query_string="saw telescope")
res

{'took': 2,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 1.420477,
  'hits': [{'_index': 'test-index',
    '_type': '_doc',
    '_id': '1',
    '_score': 1.420477,
    '_source': {'title': 'confusing statement',
     'text': 'The boy saw the man with the telescope.'}}]}}