# I. Managing indices and mapppings

In [3]:
# 1. initialize the client
from elasticsearch import Elasticsearch

HOST = "https://localhost:9200"
ELASTIC_USER = "elastic"
# The password for the 'elastic' user generated by Elasticsearch
ELASTIC_PASSWORD = "J6PR2GgDItYikH71LI6O"
# The path of ca certificates 
CA_CERTS = "C:/elasticsearch-8.6.2/config/certs/http_ca.crt"

# Create the client instance
es = Elasticsearch(
    hosts = HOST,
    ca_certs = CA_CERTS,
    http_auth = (ELASTIC_USER, ELASTIC_PASSWORD),
)
es.info()

ConnectionError: ConnectionError(<urllib3.connection.HTTPSConnection object at 0x000002289FD14250>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it) caused by: NewConnectionError(<urllib3.connection.HTTPSConnection object at 0x000002289FD14250>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it)

In [78]:
# 2. create an index
index_name = "my_index"
type_name = "_doc"

if es.indices.exists(index_name):
    es.indices.delete(index_name)

es.indices.create(index_name)
es.cluster.health(wait_for_status="yellow")

{'cluster_name': 'elasticsearch',
 'status': 'yellow',
 'timed_out': False,
 'number_of_nodes': 1,
 'number_of_data_nodes': 1,
 'active_primary_shards': 13,
 'active_shards': 13,
 'relocating_shards': 0,
 'initializing_shards': 0,
 'unassigned_shards': 2,
 'delayed_unassigned_shards': 0,
 'number_of_pending_tasks': 0,
 'number_of_in_flight_fetch': 0,
 'task_max_waiting_in_queue_millis': 0,
 'active_shards_percent_as_number': 86.66666666666667}

In [79]:
# 3. mapping
es.indices.put_mapping(
    index = index_name, 
    body = {
        "properties": {
            "uuid": {"type": "keyword"},
            "title": {
                "type": "text", 
                "term_vector": "with_positions_offsets"
            },
            "parsedtext": {
                "type": "text", 
                "term_vector": "with_positions_offsets"
            },
            "nested": {
                "type": "nested",
                "properties": {
                    "num": {"type": "integer"},
                    "name": {"type": "keyword"},
                    "value": {"type": "keyword"}
                }
            },
            "date": {"type": "date"},
            "position": {"type": "integer"},
            "name": {
                "type": "text", 
                "term_vector": "with_positions_offsets"
            }
        }
    }
)

{'acknowledged': True}

In [80]:
# 4. retrieve the mapping
import json

mappings = es.indices.get_mapping(index_name)
# prettyprint a JSON
print(json.dumps(mappings, indent=4, sort_keys=True))
#print(mappings)

{
    "my_index": {
        "mappings": {
            "properties": {
                "date": {
                    "type": "date"
                },
                "name": {
                    "term_vector": "with_positions_offsets",
                    "type": "text"
                },
                "nested": {
                    "properties": {
                        "name": {
                            "type": "keyword"
                        },
                        "num": {
                            "type": "integer"
                        },
                        "value": {
                            "type": "keyword"
                        }
                    },
                    "type": "nested"
                },
                "parsedtext": {
                    "term_vector": "with_positions_offsets",
                    "type": "text"
                },
                "position": {
                    "type": "integer"
                },
            

In [81]:
# check if the index is exists
es.indices.exists(index=index_name)

True

In [82]:
# delete the index
es.indices.delete(index=index_name)

{'acknowledged': True}

# II. Managing documents

In [83]:
index_name = "my_index"
type_name = "_doc"

In [84]:
# 1. index some documents
from datetime import datetime

es.index(
    index=index_name, 
    doc_type = type_name, 
    id=1, 
    body={
        "name": "Joe Tester", 
        "parsedtext": 
        "Joe Testere nice guy", 
        "uuid": "11111", 
        "position": 1, 
        "date": datetime(2018, 12, 8), 
        "join_field": {"name": "book"}
    }
)

es.index(
    index=index_name, 
    doc_type = type_name, 
    id="1.1", 
    body={
        "name": "data1", 
        "value": "value1", 
        "join_field": {"name": "metadata", "parent": "1"}}, 
    routing=1
)

es.index(
    index=index_name, 
    doc_type = type_name, 
    id=2, 
    body={
        "name": "Bill Baloney",  
        "parsedtext": "Bill Testere nice guy",
        "uuid": "22222", "position": 2, 
        "date": datetime(2018, 12, 8), 
        "join_field": {"name": "book"}
    }
)

es.index(
    index=index_name, 
    doc_type = type_name, 
    id="2.1", 
    body={
        "name": "data2", 
        "value": "value2", 
        "join_field": {"name": "metadata", "parent": "2"}
    }, 
    routing=2
)

es.index(
    index=index_name,
    doc_type = type_name, 
    id=3, 
    body={
        "name": "Bill Clinton", 
        "parsedtext": """Bill is not nice guy""", 
        "uuid": "33333", 
        "position": 3, "date": datetime(2018, 12, 8), 
        "join_field": {"name": "book"}
    }
)

{'_index': 'my_index',
 '_id': '3',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 4,
 '_primary_term': 1}

In [86]:
# 2. update a document
es.update(
    index=index_name, 
    #doc_type = type_name, 
    id=2, 
    body={"script": 'ctx._source.position += 1'}
)

document=es.get(index=index_name, id=2)
#print(document)
print(json.dumps(document, indent=4, sort_keys=True))

{
    "_id": "2",
    "_index": "my_index",
    "_primary_term": 1,
    "_seq_no": 5,
    "_source": {
        "date": "2018-12-08T00:00:00",
        "join_field": {
            "name": "book"
        },
        "name": "Bill Baloney",
        "parsedtext": "Bill Testere nice guy",
        "position": 3,
        "uuid": "22222"
    },
    "_version": 2,
    "found": true
}


In [87]:
# 3. delete a document
es.delete(
    index=index_name, 
    doc_type = type_name, 
    id=3)

{'_index': 'my_index',
 '_id': '3',
 '_version': 2,
 'result': 'deleted',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 6,
 '_primary_term': 1}

In [90]:
# 5. bulk insert some documents
from elasticsearch.helpers import bulk
bulk(es, [
    {
        "_index":index_name, 
        "_id":"1", 
        "source":{
            "name": "Joe Tester", 
            "parsedtext": 
            "Joe Testere nice guy", 
            "uuid": "11111", 
            "position": 1,
            "date": datetime(2018, 12, 8)}
    },

    {
        "_index": index_name, 
        "_id": "1",
        "source": {
            "name": "Bill Baloney", 
            "parsedtext": "Bill Testere nice guy", 
            "uuid": "22222", 
            "position": 2,
            "date": datetime(2018, 12, 8)
        }
    }
])

(2, [])

# III. Executing a standard search

In [91]:
# 1. execute a search with a match_all query and print the results
from pprint import pprint

results = es.search(
    index = index_name,  
    doc_type = type_name, 
    body = {
        "query": {
            "match_all": {}
        }
    }
)
pprint(results)

{'_id': '_search',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 9,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 1,
 'result': 'created'}


In [92]:
# 2. execute a search with a term query and print the results
results = es.search(
    index = index_name, 
    doc_type = type_name, 
    body = {
        "query": {
            "term": {
                "name": {
                    "boost": 3.0, 
                    "value": "joe"
                }
            }
        }
    }
)
pprint(results)

{'_id': '_search',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 10,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 2,
 'result': 'updated'}


In [93]:
# 3. execute a search with a bool filter query and print the results
results = es.search(
    index = index_name, 
    doc_type = type_name, 
    body = {
        "query": {
            "bool": {
                "filter": {
                    "bool": {
                        "should": [
                            {"term": {"position": 1}},
                            {"term": {"position": 2}}
                        ]
                    }
                }
            }
        }
    }
)
pprint(results)

{'_id': '_search',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 11,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 3,
 'result': 'updated'}


# IV. Executing a search with aggregations

In [94]:
# 1. execute a search with a terms aggregation
results = es.search(
    index = index_name, 
    doc_type = type_name, 
    body = {
        "size":0,
        "aggs": {
            "pterms": {
                "terms": {
                    "field": "name", 
                    "size": 3
                }
            }
        }
    }
)
pprint(results)

{'_id': '_search',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 12,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 4,
 'result': 'updated'}


In [95]:
# 2. execute a search with a date histogram aggregation
results = es.search(
    index = index_name, 
    doc_type = type_name, 
    body = {
        "size":0,
        "aggs": {
            "date_histo": {
                "date_histogram": {
                    "field": "date", 
                    "interval": "month"
                }
            }
        }
    }
)
pprint(results)


{'_id': '_search',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 13,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 5,
 'result': 'updated'}


In [96]:
es.indices.delete(index=index_name)

{'acknowledged': True}