In [64]:
from elasticsearch import Elasticsearch

In [65]:
# Connect to Elasticsearch
es = Elasticsearch(
    ["http://localhost:9200"],
    verify_certs=False,  
)

In [66]:
# Index name and document
index_name = "first_index"

# Specify transport options using Elasticsearch.options()
transport_options = {
    'ignore': 400
}

# Create the index with transport options
es.indices.create(index=index_name, params=transport_options)

  es.indices.create(index=index_name, params=transport_options)
  es.indices.create(index=index_name, params=transport_options)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'first_index'})

In [67]:

es.indices.exists(index=index_name)

HeadApiResponse(True)

In [68]:

es.indices.exists(index=index_name)

HeadApiResponse(True)

In [69]:

es.indices.exists(index=index_name)

HeadApiResponse(True)

In [70]:

es.indices.exists(index=index_name)

HeadApiResponse(True)

In [71]:
es.indices.delete(index=index_name)

ObjectApiResponse({'acknowledged': True})

In [72]:

es.indices.exists(index=index_name)

HeadApiResponse(False)

In [73]:
# Insert and Get Query
doc1 = {"city":"Srinagar", "Country": "India"}
doc2 = {"city":"Moscow", "Country": "Russia"}
es.index(index="cities", id=1, body=doc1)

ObjectApiResponse({'_index': 'cities', '_id': '1', '_version': 5, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 6, '_primary_term': 2})

In [74]:
es.index(index="cities", id=2, body=doc2)


ObjectApiResponse({'_index': 'cities', '_id': '2', '_version': 3, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 7, '_primary_term': 2})

In [75]:
res = es.get(index="cities", id=1)

In [76]:
res

ObjectApiResponse({'_index': 'cities', '_id': '1', '_version': 5, '_seq_no': 6, '_primary_term': 2, 'found': True, '_source': {'city': 'Srinagar', 'Country': 'India'}})

In [77]:
res["_source"]

{'city': 'Srinagar', 'Country': 'India'}

In [78]:
# Different serach query for matching docs
doc3 = {"sentence":"Today is a SUNNY day"}
doc4 = {"sentence":"Today is a BRIGHT-SUNNY day"}

In [79]:
es.index(index="english", id=1, body=doc3)

ObjectApiResponse({'_index': 'english', '_id': '1', '_version': 3, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 6, '_primary_term': 2})

In [80]:
es.index(index="english", id=2, body=doc4)


ObjectApiResponse({'_index': 'english', '_id': '2', '_version': 3, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 7, '_primary_term': 2})

In [81]:
res1 = es.search(index="english", body={
    "from":0,
    "size":0,
    "query":{
        "match":{
            "sentence":"SUNNY"
        }
    }
})

res1

ObjectApiResponse({'took': 8, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 2, 'relation': 'eq'}, 'max_score': None, 'hits': []}})

es.search: This method is used to perform a search operation on Elasticsearch. It takes parameters such as the index name, the search query body, and other optional parameters.

index="english": Specifies the index on which the search operation should be performed. In this case, it is the "english" index.

body: This is the query body that defines the search request. It's a JSON object that specifies the search criteria.

"from": 0: Specifies the starting index from where the search results should be retrieved. In this case, it is set to 0, indicating that the results should start from the first match.

"size": 0: Specifies the number of hits (search results) to be returned. Setting it to 0 indicates that only the aggregations and metadata should be returned, but no actual search hits.

"query": This is the key indicating the start of the query definition.

"match": This is a type of query that performs a full-text search on the specified field. In this case, it is searching for documents where the "sentence" field matches the term "SUNNY."

In [82]:

"""   
    res1 using a "match" query looks for documents containing individual tokens from the search term.
    res2 using a "match_phrase" query looks for documents where the entire search term appears as a phrase in the specified field.
"""
    
res2 = es.search(index="english", body={
    "from":0,
    "size":0,
    "query":{
        "match_phrase":{
            "sentence":"SUNNY"
        }
    }
})

res2


ObjectApiResponse({'took': 7, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 2, 'relation': 'eq'}, 'max_score': None, 'hits': []}})

In [83]:
res3 = es.search(index="english", body={
    "from":0,
    "size":0,
    "query":{
        "match_phrase":{
            "sentence":"bright-SUNNY"
        }
    }
})

res3

ObjectApiResponse({'took': 6, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': None, 'hits': []}})

In [84]:
"""
    The "term" query is used for exact matching of terms in Elasticsearch.
    It looks for documents where the exact term specified in the query matches the term in the specified field.
    Unlike the "match" and "match_phrase" queries, 
    the "term" query doesn't analyze the search term; it looks for the exact term in the inverted index.
"""
res4 = es.search(index="english", body={
    "from":0,
    "size":0,
    "query":{
        "term":{
            "sentence":"bright-SUNNY"
        }
    }
})

res4

ObjectApiResponse({'took': 6, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 0, 'relation': 'eq'}, 'max_score': None, 'hits': []}})

In [85]:
# Combining queries ------> must, must_not, should

"""
"bool" Query:

    The "bool" query is a compound query that allows you to combine multiple queries using boolean logic.
    It supports various clauses, including:
        "must":
            Documents must match this query. It's similar to the logical AND operation.
        "must_not":
            Documents must not match this query. It's similar to the logical NOT operation.
        "should":
            This represents optional matching. 
            Documents that match this query will contribute to the final score but are not required to match.
"""
res5 = es.search(index="english", body={
    "from":0,
    "size":0,
    "query":{
        "bool":{
            "must_not":{
                "match":{
                    "sentence":"bright"
                }
            }, 
            "should":{
                "match":{
                    "sentence":"sunny"
                }
            }
        }
    }
})

res5


ObjectApiResponse({'took': 10, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': None, 'hits': []}})

In [86]:
# Combining queries ------> must, must_not, should
res6 = es.search(index="english", body={
    "from":0,
    "size":1,
    "query":{
        "bool":{
            "must_not":{
                "match":{
                    "sentence":"bright"
                }
            }, 
            "should":{
                "match":{
                    "sentence":"sunny"
                }
            }
        }
    }
})

res6


ObjectApiResponse({'took': 16, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 0.4823361, 'hits': [{'_index': 'english', '_id': '1', '_score': 0.4823361, '_source': {'sentence': 'Today is a SUNNY day'}}]}})

In [87]:
# Regular Expressions in ES

# Add another document
doc5 = {"sentence":"Today is a rainy day"}
es.index(index="english", id=3, body=doc5)



ObjectApiResponse({'_index': 'english', '_id': '3', '_version': 3, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 8, '_primary_term': 2})

In [88]:
es.get(index="english", id=1)
es.get(index="english", id=2)
es.get(index="english", id=3)

ObjectApiResponse({'_index': 'english', '_id': '3', '_version': 3, '_seq_no': 8, '_primary_term': 2, 'found': True, '_source': {'sentence': 'Today is a rainy day'}})

In [89]:
"""
    "regexp" Query:

    The "regexp" query allows you to search for documents based on a regular expression pattern.
    In this case, the regular expression pattern is ".*" which essentially matches any string (zero or more characters). 
    This means it will match any document where the "sentence" field contains at least one character.
"""

res7 = es.search(index="english", body={
    "from":0,
    "size":3,
    "query":{
        "regexp":{
            "sentence":".*"
        }
    }
})

res7

ObjectApiResponse({'took': 14, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 3, 'relation': 'eq'}, 'max_score': 1.0, 'hits': [{'_index': 'english', '_id': '2', '_score': 1.0, '_source': {'sentence': 'Today is a BRIGHT-SUNNY day'}}, {'_index': 'english', '_id': '3', '_score': 1.0, '_source': {'sentence': 'Today is a rainy day'}}, {'_index': 'english', '_id': '1', '_score': 1.0, '_source': {'sentence': 'Today is a SUNNY day'}}]}})

In [90]:
# Mapping in ElasticSearch 
"""
In Elasticsearch, mapping is the process of defining how documents and their fields should be stored and indexed.
It defines the data type of each field and other properties that control how the data is indexed and queried.
Mapping is crucial for Elasticsearch to understand the structure of your data and to perform efficient searches.
""" 
from datetime import datetime

doc6 = {"city":"Jammu","Country":"India", "datetime":datetime.now()}
doc7 = {"city":"Srinagar","Country":"India", "datetime":datetime.now()}
doc8 = {"city":"New Delhi","Country":"India", "datetime":datetime.now()}



In [91]:
es.index(index="travel",id=1, body=doc6)

ObjectApiResponse({'_index': 'travel', '_id': '1', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

In [92]:
es.index(index="travel",id=2, body=doc7)

ObjectApiResponse({'_index': 'travel', '_id': '2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1})

In [93]:
es.index(index="travel",id=3, body=doc8)


ObjectApiResponse({'_index': 'travel', '_id': '3', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1})

In [94]:
es.indices.get_mapping(index="travel")

ObjectApiResponse({'travel': {'mappings': {'properties': {'Country': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'city': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'datetime': {'type': 'date'}}}}})

In [95]:
es.indices.delete(index="travel")

ObjectApiResponse({'acknowledged': True})

In [96]:
es.indices.create(index="travel")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'travel'})

In [97]:
# Date Histogram Aggregation in Elasticsearch
"""
    In Elasticsearch, the Date Histogram Aggregation is a powerful feature 
    that allows you to aggregate data based on date intervals.
    It's particularly useful when you want to analyze data over time,
    such as viewing the number of documents created or events occurring within specific time intervals.
"""
# es.search(index="travel", body={
#     "from": 0,
#     "size": 0,
#     "query": {
#         "match_all": {}
#     ,
#     "aggs": {
#         "date_histogram": {
#             "field": "datetime",
#             "interval": "year"}
#         }
#     }
# })



"\n    In Elasticsearch, the Date Histogram Aggregation is a powerful feature \n    that allows you to aggregate data based on date intervals.\n    It's particularly useful when you want to analyze data over time,\n    such as viewing the number of documents created or events occurring within specific time intervals.\n"

In [99]:
# Bulk Insert and Scan
"""Bulk operations in Elasticsearch are efficient ways to index multiple documents in a single request
    or to perform multiple actions in a single API call.
    These operations are often used for bulk indexing or updating of documents.
    
    If you want to perform a bulk scan or retrieve multiple documents in a single request, you can use the mget API. 
"""

actions = [
    {
    "_index": "Chapter",
    "_type":"doc",
    "_id":j,
    "_source":{
        "any":"data"+str(j),"timestamp":datetime.now()
    }
    }
    for j in range(0,100)
    ]