# Elasticsearch - using the low-level library
Rupert Thomas, October 2020  

Using: https://elasticsearch-dsl.readthedocs.io/en/latest/index.html


In [1]:
from pprint import pprint

from datetime import datetime
from elasticsearch import Elasticsearch

In [2]:
es = Elasticsearch(hosts=['elasticsearch'])

### Add new record, automatically create an index

In [3]:
doc = {
    'author': 'kimchy',
    'text': 'Elasticsearch: cool. bonsai cool.',
    'timestamp': datetime.now(),
}

res = es.index(index="test-index", id=1, body=doc)

print(res['result'])

updated


### Get record back

In [4]:
# Get record back
res = es.get(index="test-index", id=1)
pprint(res['_source'])

{'author': 'kimchy',
 'text': 'Elasticsearch: cool. bonsai cool.',
 'timestamp': '2020-11-04T15:56:55.013872'}


### Add another document

In [5]:
doc = {
    'author': 'Dave Smith',
    'text': 'Linux expert',
    'timestamp': datetime.now(),
}

res = es.index(index="test-index", body=doc)

# print(res['result'])
pprint(res)

{'_id': 'TI75k3UBztxdYHj6rTb1',
 '_index': 'test-index',
 '_primary_term': 1,
 '_seq_no': 7,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_type': '_doc',
 '_version': 1,
 'result': 'created'}


In [6]:
doc = {
    'author': 'Dave Smith',
    'text': 'Linux expert',
    'timestamp': datetime.now(),
}

res = es.index(index="test-index", body=doc)

# print(res['result'])
pprint(res)

{'_id': 'TY75k3UBztxdYHj6rjYw',
 '_index': 'test-index',
 '_primary_term': 1,
 '_seq_no': 8,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_type': '_doc',
 '_version': 1,
 'result': 'created'}


### Get all records

In [7]:
es.indices.refresh(index="test-index")

res = es.search(index="test-index", body={"query": {"match_all": {}}})

print("Got %d Hits:" % res['hits']['total']['value'])

for hit in res['hits']['hits']:
    print("%(timestamp)s %(author)s: %(text)s" % hit["_source"])

Got 7 Hits:
2020-11-04T11:59:52.190634 Dave Smith: Linux expert
2020-11-04T12:05:20.613194 Dave Smith: Linux expert
2020-11-04T15:52:25.286850 Dave Smith: Linux expert
2020-11-04T15:52:25.323666 Dave Smith: Linux expert
2020-11-04T15:56:55.013872 kimchy: Elasticsearch: cool. bonsai cool.
2020-11-04T15:56:55.155703 Dave Smith: Linux expert
2020-11-04T15:56:55.211527 Dave Smith: Linux expert


## Query records
### Broad search

In [8]:
es.indices.refresh(index="test-index")

res = es.search(index="test-index", body={"query": {"query_string": {"query": "Dave"}}})

print("Got %d Hits:" % res['hits']['total']['value'])

for hit in res['hits']['hits']:
    print("%(timestamp)s %(author)s: %(text)s" % hit["_source"])

Got 6 Hits:
2020-11-04T11:59:52.190634 Dave Smith: Linux expert
2020-11-04T12:05:20.613194 Dave Smith: Linux expert
2020-11-04T15:52:25.286850 Dave Smith: Linux expert
2020-11-04T15:52:25.323666 Dave Smith: Linux expert
2020-11-04T15:56:55.155703 Dave Smith: Linux expert
2020-11-04T15:56:55.211527 Dave Smith: Linux expert


### Specific fields

In [9]:
es.indices.refresh(index="test-index")

res = es.search(index="test-index", body={"query": {"match": {"author": "Dave"}}})  

print("Got %d Hits:" % res['hits']['total']['value'])

for hit in res['hits']['hits']:
    print("%(timestamp)s %(author)s: %(text)s" % hit["_source"])

Got 6 Hits:
2020-11-04T11:59:52.190634 Dave Smith: Linux expert
2020-11-04T12:05:20.613194 Dave Smith: Linux expert
2020-11-04T15:52:25.286850 Dave Smith: Linux expert
2020-11-04T15:52:25.323666 Dave Smith: Linux expert
2020-11-04T15:56:55.155703 Dave Smith: Linux expert
2020-11-04T15:56:55.211527 Dave Smith: Linux expert


### Fuzzy match

See: https://medium.com/@neelambuj2/an-approach-to-highly-intuitive-fuzzy-search-in-elasticsearch-with-typo-handling-exact-matches-a79a795d36f8

In [10]:
# Allow fuzzy matching, and boost exact match

query_desc = {
    "query": {
        "bool": {
            "should": [
                {
                    "multi_match": {
                        "query": "David",
                        "type": "phrase",
                        "fields": [
                            "author", "text"
                        ],
                        "boost": 10
                    }
                },
                {
                    "multi_match": {
                        "query": "Linus",
                        "type": "most_fields",
                        "fields": [
                            "author", "text"
                        ],
                        "fuzziness":"AUTO"
                    }
                }
            ]
        }
    }
}

es.indices.refresh(index="test-index")

res = es.search(index="test-index", body=query_desc)

print("Got %d Hits:" % res['hits']['total']['value'])

for hit in res['hits']['hits']:
    print("%(timestamp)s %(author)s: %(text)s" % hit["_source"])

Got 6 Hits:
2020-11-04T11:59:52.190634 Dave Smith: Linux expert
2020-11-04T12:05:20.613194 Dave Smith: Linux expert
2020-11-04T15:52:25.286850 Dave Smith: Linux expert
2020-11-04T15:52:25.323666 Dave Smith: Linux expert
2020-11-04T15:56:55.155703 Dave Smith: Linux expert
2020-11-04T15:56:55.211527 Dave Smith: Linux expert


In [11]:
pprint(res)

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 1, 'total': 1},
 'hits': {'hits': [{'_id': 'SI4gk3UBztxdYHj6pzaA',
                    '_index': 'test-index',
                    '_score': 0.28353634,
                    '_source': {'author': 'Dave Smith',
                                'text': 'Linux expert',
                                'timestamp': '2020-11-04T11:59:52.190634'},
                    '_type': '_doc'},
                   {'_id': 'SY4lk3UBztxdYHj6qjZn',
                    '_index': 'test-index',
                    '_score': 0.28353634,
                    '_source': {'author': 'Dave Smith',
                                'text': 'Linux expert',
                                'timestamp': '2020-11-04T12:05:20.613194'},
                    '_type': '_doc'},
                   {'_id': 'So71k3UBztxdYHj6jzbI',
                    '_index': 'test-index',
                    '_score': 0.28353634,
                    '_source': {'author': 'Dave Smith',
           