## Goal: Investigate ES 1.7 -> 7.10 upgrade for DOAJ

Prerequisites: ES cluster to talk to (docker or local)
To start the docker elasticsearch cluster, first increase virtual memory available:

temporarily with `sysctl -w vm.max_map_count=262144`
or permanently via setting:
`vm.max_map_count` to `262144` in `/etc/sysctl.conf`

(in virtualenv) `pip install elasticsearch jupyter`

https://elasticsearch-py.readthedocs.io/en/7.10.0/

In [1]:
from elasticsearch import Elasticsearch

This plugin can use more than one host, so check if it'll handle the fail-over for us

In [7]:
# For local
es_local = {'host': 'localhost', 'port': 9200}
esl = Elasticsearch([es_local])

In [2]:
# For docker cluster
es1 = {'host': 'localhost', 'port': 9201}
es2 = {'host': 'localhost', 'port': 9202}
es = Elasticsearch([es1, es2])

## Create an index with mappings

In [24]:
TEST_SETTINGS = {
  "mappings": {
    "dynamic_templates": [
      {
        "integers": {
          "match_mapping_type": "long",
          "mapping": {
            "type": "integer"
          }
        }
      },

    ]
  }
}

DOC = {
  "my_integer": 5,
  "my_string": "Some string"
}
#es.indices.create(index='test_index', body=TEST_SETTINGS)
resp =es.index(index='test_index', body=DOC)


In [25]:
# DOAJ mapping for account (default dynamic)
# Esprit creates an index the puts the mapping in 2 ops
# {'doc': {'dynamic_templates': [{'default': {'match': '*', 'match_mapping_type': 'string', 'mapping': {'type': 'multi_field', 'fields': {'{name}': {'type': '{dynamic_type}', 'index': 'analyzed', 'store': 'no'}, 'exact': {'type': '{dynamic_type}', 'index': 'not_analyzed', 'store': 'yes'}}}}}]}}
# 'http://localhost:9200/doajtest-account/_mapping/doc'

# todo: we should tailor index settings (shards, replicas) to the data

# Aliases may allow us to simplify the in_doaj stuff, e.g.
# 'aliases': {
#     'journal-public': {
#         'filter': {
#             'term': { 'admin.in_doaj': 'true' }
#         }
#     }

# OLD_DOAJ_DYNAMIC = {
#     'dynamic_templates': [
#             {
#                 'default': {
#                     'match': '*', 'match_mapping_type': 'string', 'mapping': {
#                         'type': 'multi_field', 'fields': {
#                             '{name}': {'type': '{dynamic_type}', 'index': 'analyzed', 'store': 'no'},
#                             'exact': {'type': '{dynamic_type}', 'index': 'not_analyzed', 'store': 'yes'}}
#                     }
#                 }
#             }
#         ]
#     }


CREATE_BODY = {
    'aliases': {
        'account': {}
    },
    'mappings': {
        'dynamic_templates': [
            {
                "strings": {
                    "match_mapping_type": "string",
                    "mapping": {
                        "type": "text",
                        "fields": {
                            "exact": {
                                "type": "keyword",
                                "ignore_above": 256,
                                "normalizer": "lowercase"
                            }
                        }
                    }
                }
            }
        ]
    },
    'settings': {
        'number_of_shards': 4,
        'number_of_replicas': 1
    }
}

# todo: do we want to do a check on index init that it has the correct mappings?

# Use the create index api with the mapping
es.indices.create(index='doaj-account', body=CREATE_BODY)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'doaj-account'}

## Put some data in the index

In [27]:
steve = {"api_key": "082d52e9-4e8c-4656-9884-2d7185318cf3", "last_updated": "2021-04-27T09:49:11Z", "marketing_consent": False, "id": "steve", "role": ["admin", "api"], "created_date": "2014-09-10T15:53:50Z", "password": "pbkdf2:sha256:150000$o6pVxBxY$f8c25903211437b168af63b465c283942a9192f086fa77872a72cdaef0579c91", "email": "865ba63d3ac9a0a9be7fc8687864cbbca2de0ba24c72bb896157a29aadfc89b1@example.com", "es_type": "account"}

es.create(index='doaj-account', id='steve', body=steve)


ConflictError: ConflictError(409, 'version_conflict_engine_exception', '[steve]: version conflict, document already exists (current version [1])')

Listing and managing indexes by prefix is easy:

In [10]:
resp = es.indices.get('_all')
es.indices.get('doaj-*')
#es.indices.delete('doaj-*')

{'doaj-account': {'aliases': {'account': {}},
  'mappings': {'dynamic_templates': [{'default': {'match': '*',
      'match_mapping_type': 'string',
      'mapping': {'fields': {'{name}': {'index': 'analyzed',
         'store': 'no',
         'type': '{dynamic_type}'},
        'exact': {'index': 'not_analyzed',
         'store': 'yes',
         'type': '{dynamic_type}'}},
       'type': 'multi_field'}}}]},
  'settings': {'index': {'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}},
    'number_of_shards': '4',
    'provided_name': 'doaj-account',
    'creation_date': '1619963834064',
    'number_of_replicas': '1',
    'uuid': 'yJB6W_5URZGpthFRkaK62A',
    'version': {'created': '7100099'}}}}}

In [13]:
resp = es.indices.get('*')