In [1]:
from elasticsearch import Elasticsearch, helpers
from elasticsearch._sync.client import IndicesClient
from elasticsearch_dsl import Search
import json

In [2]:
def delete_index(cli, index_name):
    cli.indices.close(index=index_name)
    cli.indices.delete(index=index_name)

In [13]:
client = Elasticsearch(hosts='http://localhost:9200', request_timeout=120)
search = Search(using=client, index='documents_stem')

In [6]:
document_dump = {}
dump_to_disk = False
for hit in search.scan():
    document_dump[hit.id] = hit.to_dict()
if dump_to_disk:
    with open('document_dump.json', 'w', encoding='utf-8') as f:
        json.dump(document_dump, f, ensure_ascii=False, indent=2)

In [19]:
delete_index(client, 'documents_test')

In [20]:
client.indices.create(
    index='documents_test',
    mappings={
        "properties": {
            "id": {"type": "keyword"},
            "name": {"type": "text", "fields": {"keyword": {"type": "keyword", "ignore_above": 256}}},
            "text": {"type": "text", "analyzer": "medical_analyzer_german"},
        }
    },
    settings={
        "analysis": {
            "analyzer": {
                "medical_analyzer_german": {
                    "tokenizer": "whitespace",
                    "filter": ["lowercase", "german_stop", "german_snowball"]
                },
            },
            "filter": {
                "german_snowball": {"type": "snowball", "language": "German2"},
                "german_stop": {"type": "stop", "stopwords": "_german_"},
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'documents_test'})

In [None]:
indices_client = IndicesClient(client)
indices_client.analyze(
    index='documents_stem',
    body={
        "analyzer": "medical_analyzer_german",
        "text": "Jet-Lavage, Debridement und VAG Wechsel linken Hand"
    }
).body

In [23]:
from_file = True
if from_file:
    with open('document_dump.json', 'r', encoding='utf-8') as f:
        json_data = json.load(f)
        index_actions = (
            {
                "_index": "documents_test",
                "_id": hit.get("id"),
                "_source": hit
            } for hit in json_data.values()
        )
else:
    index_actions = (
        {
            "_index": "documents_test",
            "_id": hit.id,
            "_source": hit.to_dict()
        } for hit in search.scan()
    )

In [24]:
helpers.bulk(client, list(index_actions)[:8])

(8, [])

In [18]:
client.search(
    index='documents_stem',
    body={
        "query": {
            "match": {
                "text": {
                    "query": "stationär",
                }
            }
        }
    }
)

ApiError: ApiError(503, 'search_phase_execution_exception', None)