In [37]:
from elasticsearch import Elasticsearch
from elasticsearch._sync.client import IndicesClient
from elasticsearch_dsl import Search
import json

In [44]:
def delete_index(cli, index_name):
    cli.indices.close(index=index_name)
    cli.indices.delete(index=index_name)

In [None]:
client = Elasticsearch(hosts='http://localhost:9008')
search = Search(using=client, index='documents')

In [12]:
document_dump = {}
for hit in search.scan():
    document_dump[hit.id] = hit.to_dict()
with open('document_dump.json', 'w', encoding='utf-8') as f:
    json.dump(document_dump, f, ensure_ascii=False, indent=2)

In [52]:
delete_index(client, 'documents_stem')

In [53]:
client.indices.create(
    index='documents_stem',
    mappings={
        "properties": {
            "id": {"type": "keyword"},
            "name": {"type": "text", "fields": {"keyword": {"type": "keyword", "ignore_above": 256}}},
            "text": {"type": "text", "analyzer": "medical_analyzer_german"},
        }
    },
    settings={
        "analysis": {
            "analyzer": {
                "medical_analyzer_german": {
                    "tokenizer": "whitespace",
                    "filter": ["lowercase", "german_stop", "german_snowball"]
                },
            },
            "filter": {
                "german_snowball": {"type": "snowball", "language": "German2"},
                "german_stop": {"type": "stop", "stopwords": "_german_"},
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'documents_stem'})

In [54]:
indices_client = IndicesClient(client)
indices_client.analyze(
    index='documents_stem',
    body={
        "analyzer": "medical_analyzer_german",
        "text": "Jet-Lavage, Debridement und VAG Wechsel linken Hand"
    }
).body

{'tokens': [{'token': 'jet-lavage,',
   'start_offset': 0,
   'end_offset': 11,
   'type': 'word',
   'position': 0},
  {'token': 'debridement',
   'start_offset': 12,
   'end_offset': 23,
   'type': 'word',
   'position': 1},
  {'token': 'vag',
   'start_offset': 28,
   'end_offset': 31,
   'type': 'word',
   'position': 3},
  {'token': 'wechsel',
   'start_offset': 32,
   'end_offset': 39,
   'type': 'word',
   'position': 4},
  {'token': 'link',
   'start_offset': 40,
   'end_offset': 46,
   'type': 'word',
   'position': 5},
  {'token': 'hand',
   'start_offset': 47,
   'end_offset': 51,
   'type': 'word',
   'position': 6}]}