In [None]:
import elasticsearch
import glob
import json


#Path pattern for the json files to feed into index

data_files = r"../../../ltrs/letters/*/json/*.json"
#The index to create / feed
index = "letter_index"


#Check whether elasticsearch is running
elastic = elasticsearch.Elasticsearch()
if elastic.ping():
    print("INFO: Elasticsearch is running")
else:
    raise Exception("Elasticsearch is not running")

# Read the settings for the index
with open("index_settings.json",mode="r",encoding="utf-8") as index_settings:
    settings = index_settings.read()

# Create the index
if elastic.indices.exists(index):
    print("INFO: Deleting previous index {0}".format(index))
    elastic.indices.delete(index)
elastic.indices.create(index, settings)
print("INFO: Index {0} created sucessfully".format(index))

# Retrieve all letters
letter_files = glob.glob(data_files)
print("Found {0} letters in {1}".format(len(letter_files), data_files))

n_success = 0
n_fail = 0
for letter_file in letter_files:
    with open(letter_file,mode="r",encoding="utf-8") as data_file:
        content = data_file.read()
    
    # Send to elasticsearch for indexing
    res_json = elastic.index(index=index, doc_type="_doc", body=content)
    result = None
    if "result" in res_json:
        result = res_json["result"]
    if result != 'created':
        print("Indexing failed with results {0} for {1}".format(result, file))
        n_fail += 1
    else:
        n_success += 1
    if n_success%500 == 0:
        print("Indexing reached {0} documents".format(n_success))

print("Indexing completed. Successfully indexed: {0}. Failed: {1}".format(n_success, n_fail))






In [None]:
import elasticsearch
import glob
import json

bulk_commands = []
for letter_file in letter_files:
    with open(letter_file,mode="r",encoding="utf-8") as data_file:
        content = json.loads(data_file.read())
    bulk_commands.append(json.dumps({ "index" : { "_index" : index } }))
    bulk_commands.append(json.dumps(content))

# Send to elasticsearch for indexing
res_json = elastic.bulk(bulk_commands)

print("Indexing completed. Successfully indexed: {}, errors: {}".format(len(bulk_commands)/2, res_json.get("errors","unknown")))