Prerequisites: ES cluster to talk to (docker or local)
To start the docker elasticsearch cluster, first increase virtual memory available:

temporarily with `sysctl -w vm.max_map_count=262144`
or permanently via setting:
`vm.max_map_count` to `262144` in `/etc/sysctl.conf`

then:

    docker-compose up

(in virtualenv) `pip install elasticsearch jupyter`

In [41]:
from datetime import datetime
from uuid import uuid4
from copy import deepcopy

import elasticsearch
es1 = {'host': 'localhost', 'port': 9201}
es2 = {'host': 'localhost', 'port': 9202}
es = elasticsearch.Elasticsearch([es1, es2])

### Check we have redundancy:

Notice we talk to both hosts

In [8]:
for i in range(0, 9):
    print(es.info(pretty=True))

{'name': 'es01', 'cluster_name': 'es-docker-cluster', 'cluster_uuid': 'y2M8TuB0RhGkyhL5UZ_X6A', 'version': {'number': '7.10.0', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '51e9d6f22758d0374a0f3f5c6e8f3a7997850f96', 'build_date': '2020-11-09T21:30:33.964949Z', 'build_snapshot': False, 'lucene_version': '8.7.0', 'minimum_wire_compatibility_version': '6.8.0', 'minimum_index_compatibility_version': '6.0.0-beta1'}, 'tagline': 'You Know, for Search'}
{'name': 'es02', 'cluster_name': 'es-docker-cluster', 'cluster_uuid': 'y2M8TuB0RhGkyhL5UZ_X6A', 'version': {'number': '7.10.0', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '51e9d6f22758d0374a0f3f5c6e8f3a7997850f96', 'build_date': '2020-11-09T21:30:33.964949Z', 'build_snapshot': False, 'lucene_version': '8.7.0', 'minimum_wire_compatibility_version': '6.8.0', 'minimum_index_compatibility_version': '6.0.0-beta1'}, 'tagline': 'You Know, for Search'}
{'name': 'es01', 'cluster_name': 'es-docker-cluster', 'clu

Now stop one of the hosts (in terminal)

    docker stop es01

Then run the cell above again. This time it seamlessly connects to es02 each time.
Restart the container.

    docker start es01

In [66]:
# Clear up previous run
es.indices.delete('doaj-*') if es.indices.get('doaj-*') else print('Nothing to do')

{'acknowledged': True}

### Creating an index with custom dynamic mapping

The dynamic mappings have changed somewhat since ES 1.7. For reference, here's the old
default dynamic mapping:
```
'dynamic_templates': [
            {
                'default': {
                    'match': '*', 'match_mapping_type': 'string', 'mapping': {
                        'type': 'multi_field', 'fields': {
                            '{name}': {'type': '{dynamic_type}', 'index': 'analyzed', 'store': 'no'},
                            'exact': {'type': '{dynamic_type}', 'index': 'not_analyzed', 'store': 'yes'}}
                    }
                }
            }
        ]
    }
```
The following gives us an equivalent `.exact` not_analyzed **keyword** field.

In [67]:
CREATE_BODY = {
    'aliases': {
        'account': {}
    },
    'mappings': {
        'dynamic_templates': [
            {
                "strings": {
                    "match_mapping_type": "string",
                    "mapping": {
                        "type": "text",
                        "fields": {
                            "exact": {
                                "type": "keyword",
                                "normalizer": "lowercase"
                            }
                        }
                    }
                }
            }
        ]
    },
    'settings': {
        'number_of_shards': 4,
        'number_of_replicas': 1
    }
}

# todo: do we want to do a check on index init that it has the correct mappings?

# Use the create index api with the mapping
es.indices.create(index='doaj-account', body=CREATE_BODY)


{'acknowledged': True, 'shards_acknowledged': True, 'index': 'doaj-account'}

### Put some data in the index

In [68]:
steve = {"api_key": uuid4().hex, "last_updated": "2021-04-27T09:49:11Z", "marketing_consent": False, "id": "steve", "role": ["admin", "api"], "created_date": "2014-09-10T15:53:50Z", "password": "pbkdf2:sha256:150000$o6pVxBxY$f8c25903211437b168af63b465c283942a9192f086fa77872a72cdaef0579c91", "email": "steve@example.com", "es_type": "account"}
bob =  {"api_key": uuid4().hex, "last_updated": "2021-04-27T09:49:11Z", "marketing_consent": False, "id": "bob", "role": ["publisher", "api"], "created_date": "2014-09-10T15:53:50Z", "password": "pbkdf2:sha256:150000$o6pVxBxY$f8c25903211437b168af63b465c283942a9192f086fa77872a72cdaef0579c91", "email": "bob@example.com", "es_type": "account"}

es.create(index='doaj-account', id='steve', body=steve)
es.create(index='doaj-account', id='bob', body=bob)

{'_index': 'doaj-account',
 '_type': '_doc',
 '_id': 'bob',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 2, 'failed': 0},
 '_seq_no': 1,
 '_primary_term': 1}

In [38]:
# An additional create will cause a 409 conflict
try:
    es.create(index='doaj-account', id='steve', body=steve)
except elasticsearch.ConflictError as e:
    print(e)

ConflictError(409, 'version_conflict_engine_exception', '[steve]: version conflict, document already exists (current version [4])')


In [40]:
# With correct method es.index
steve['last_updated'] = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
es.index(index='doaj-account', id='steve', body=steve)

{'_index': 'doaj-account',
 '_type': '_doc',
 '_id': 'steve',
 '_version': 5,
 'result': 'updated',
 '_shards': {'total': 2, 'successful': 2, 'failed': 0},
 '_seq_no': 5,
 '_primary_term': 1}

### Elasticsearch concurrency control (save validation)
https://www.elastic.co/guide/en/elasticsearch/reference/7.10/optimistic-concurrency-control.html

Ensure we reject changes when a document has been saved in interim.

In [52]:
bob_retrieved = es.get('doaj-account', id='bob')
bob_retrieved

{'_index': 'doaj-account',
 '_type': '_doc',
 '_id': 'bob',
 '_version': 2,
 '_seq_no': 6,
 '_primary_term': 1,
 'found': True,
 '_source': {'api_key': '92d022188e604648b8f1c69f20ff0b4c',
  'last_updated': '2021-04-27T09:49:11Z',
  'marketing_consent': False,
  'id': 'bob',
  'role': ['publisher', 'api'],
  'created_date': '2014-09-10T15:53:50Z',
  'password': 'pbkdf2:sha256:150000$o6pVxBxY$f8c25903211437b168af63b465c283942a9192f086fa77872a72cdaef0579c91',
  'email': 'bob@example.com',
  'es_type': 'account'}}

In [53]:
# Bob's API key is updated before another user is finished updating Bob
bob_interjected = deepcopy(bob_retrieved).get('_source')
bob_interjected['api_key'] = uuid4().hex
es.index('doaj-account', id='bob', body=bob_interjected, if_seq_no=bob_retrieved['_seq_no'], if_primary_term=bob_retrieved['_primary_term'])

{'_index': 'doaj-account',
 '_type': '_doc',
 '_id': 'bob',
 '_version': 3,
 'result': 'updated',
 '_shards': {'total': 2, 'successful': 2, 'failed': 0},
 '_seq_no': 7,
 '_primary_term': 1}

In [57]:
# Then we try to carry on with our update of Bob, specifying our sequences as before
bob_retrieved['_source']['last_updated'] = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
try:
    es.index('doaj-account', id='bob', body=bob_retrieved['_source'], if_seq_no=bob_retrieved['_seq_no'], if_primary_term=bob_retrieved['_primary_term'])
except elasticsearch.ConflictError as e:
    print(e)

ConflictError(409, 'version_conflict_engine_exception', '[bob]: version conflict, required seqNo [6], primary term [1]. current document has seqNo [7] and primary term [1]')


In [59]:
# Try again with correct sequence:
bob_uptodate = es.get('doaj-account', id='bob')
es.index('doaj-account', id='bob', body=bob_retrieved['_source'], if_seq_no=bob_uptodate['_seq_no'], if_primary_term=bob_uptodate['_primary_term'])
es.get('doaj-account', id='bob')

{'_index': 'doaj-account',
 '_type': '_doc',
 '_id': 'bob',
 '_version': 5,
 '_seq_no': 9,
 '_primary_term': 1,
 'found': True,
 '_source': {'api_key': '92d022188e604648b8f1c69f20ff0b4c',
  'last_updated': '2021-05-03T20:34:01Z',
  'marketing_consent': False,
  'id': 'bob',
  'role': ['publisher', 'api'],
  'created_date': '2014-09-10T15:53:50Z',
  'password': 'pbkdf2:sha256:150000$o6pVxBxY$f8c25903211437b168af63b465c283942a9192f086fa77872a72cdaef0579c91',
  'email': 'bob@example.com',
  'es_type': 'account'}}

In [69]:
# Match all search
es.search({'query': {'match_all': {}}}, index='doaj-account')


{'took': 15,
 'timed_out': False,
 '_shards': {'total': 4, 'successful': 4, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': 1.0,
  'hits': [{'_index': 'doaj-account',
    '_type': '_doc',
    '_id': 'steve',
    '_score': 1.0,
    '_source': {'api_key': '19e75ffcb68a4b5d9f5d8ffe4d78b6a5',
     'last_updated': '2021-04-27T09:49:11Z',
     'marketing_consent': False,
     'id': 'steve',
     'role': ['admin', 'api'],
     'created_date': '2014-09-10T15:53:50Z',
     'password': 'pbkdf2:sha256:150000$o6pVxBxY$f8c25903211437b168af63b465c283942a9192f086fa77872a72cdaef0579c91',
     'email': 'steve@example.com',
     'es_type': 'account'}},
   {'_index': 'doaj-account',
    '_type': '_doc',
    '_id': 'bob',
    '_score': 1.0,
    '_source': {'api_key': 'd75ef6935e2b4fd3851dfc70becff610',
     'last_updated': '2021-04-27T09:49:11Z',
     'marketing_consent': False,
     'id': 'bob',
     'role': ['publisher', 'api'],
     'created_date': '2014-09

### Pull account by API key

In [72]:
q = {
    'query': {
        'term': {'api_key.exact': steve['api_key']}
    }
}

es.search(q, index='doaj-account')

#$$ md

{'took': 6,
 'timed_out': False,
 '_shards': {'total': 4, 'successful': 4, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 0.6931471,
  'hits': [{'_index': 'doaj-account',
    '_type': '_doc',
    '_id': 'steve',
    '_score': 0.6931471,
    '_source': {'api_key': '19e75ffcb68a4b5d9f5d8ffe4d78b6a5',
     'last_updated': '2021-04-27T09:49:11Z',
     'marketing_consent': False,
     'id': 'steve',
     'role': ['admin', 'api'],
     'created_date': '2014-09-10T15:53:50Z',
     'password': 'pbkdf2:sha256:150000$o6pVxBxY$f8c25903211437b168af63b465c283942a9192f086fa77872a72cdaef0579c91',
     'email': 'steve@example.com',
     'es_type': 'account'}}]}}