In [1]:
from elasticsearch import Elasticsearch

In [2]:
# Connect to Elasticsearch
es = Elasticsearch(
    ["http://localhost:9200"],
    verify_certs=False,  
)

In [3]:
# Index name and document
index_name = "first_index"

# Specify transport options using Elasticsearch.options()
transport_options = {
    'ignore': 400
}

# Create the index with transport options
es.indices.create(index=index_name, params=transport_options)

  es.indices.create(index=index_name, params=transport_options)
  es.indices.create(index=index_name, params=transport_options)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'first_index'})

In [4]:

es.indices.exists(index=index_name)

HeadApiResponse(True)

In [5]:

es.indices.exists(index=index_name)

HeadApiResponse(True)

In [6]:

es.indices.exists(index=index_name)

HeadApiResponse(True)

In [7]:

es.indices.exists(index=index_name)

HeadApiResponse(True)

In [8]:
es.indices.delete(index=index_name)

ObjectApiResponse({'acknowledged': True})

In [9]:

es.indices.exists(index=index_name)

HeadApiResponse(False)

In [10]:
# Insert and Get Query
doc1 = {"city":"Srinagar", "Country": "India"}
doc2 = {"city":"Moscow", "Country": "Russia"}
es.index(index="cities", id=1, body=doc1)

ObjectApiResponse({'_index': 'cities', '_id': '1', '_version': 6, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 8, '_primary_term': 5})

In [11]:
es.index(index="cities", id=2, body=doc2)


ObjectApiResponse({'_index': 'cities', '_id': '2', '_version': 4, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 9, '_primary_term': 5})

In [12]:
res = es.get(index="cities", id=1)

In [13]:
res

ObjectApiResponse({'_index': 'cities', '_id': '1', '_version': 6, '_seq_no': 8, '_primary_term': 5, 'found': True, '_source': {'city': 'Srinagar', 'Country': 'India'}})

In [14]:
res["_source"]

{'city': 'Srinagar', 'Country': 'India'}

In [15]:
# Different serach query for matching docs
doc3 = {"sentence":"Today is a SUNNY day"}
doc4 = {"sentence":"Today is a BRIGHT-SUNNY day"}

In [16]:
es.index(index="english", id=1, body=doc3)

ObjectApiResponse({'_index': 'english', '_id': '1', '_version': 4, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 9, '_primary_term': 5})

In [17]:
es.index(index="english", id=2, body=doc4)


ObjectApiResponse({'_index': 'english', '_id': '2', '_version': 4, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 10, '_primary_term': 5})

In [18]:
res1 = es.search(index="english", body={
    "from":0,
    "size":0,
    "query":{
        "match":{
            "sentence":"SUNNY"
        }
    }
})

res1

ObjectApiResponse({'took': 114, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 2, 'relation': 'eq'}, 'max_score': None, 'hits': []}})

es.search: This method is used to perform a search operation on Elasticsearch. It takes parameters such as the index name, the search query body, and other optional parameters.

index="english": Specifies the index on which the search operation should be performed. In this case, it is the "english" index.

body: This is the query body that defines the search request. It's a JSON object that specifies the search criteria.

"from": 0: Specifies the starting index from where the search results should be retrieved. In this case, it is set to 0, indicating that the results should start from the first match.

"size": 0: Specifies the number of hits (search results) to be returned. Setting it to 0 indicates that only the aggregations and metadata should be returned, but no actual search hits.

"query": This is the key indicating the start of the query definition.

"match": This is a type of query that performs a full-text search on the specified field. In this case, it is searching for documents where the "sentence" field matches the term "SUNNY."

In [19]:

"""   
    res1 using a "match" query looks for documents containing individual tokens from the search term.
    res2 using a "match_phrase" query looks for documents where the entire search term appears as a phrase in the specified field.
"""
    
res2 = es.search(index="english", body={
    "from":0,
    "size":0,
    "query":{
        "match_phrase":{
            "sentence":"SUNNY"
        }
    }
})

res2


ObjectApiResponse({'took': 2, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 2, 'relation': 'eq'}, 'max_score': None, 'hits': []}})

In [20]:
res3 = es.search(index="english", body={
    "from":0,
    "size":0,
    "query":{
        "match_phrase":{
            "sentence":"bright-SUNNY"
        }
    }
})

res3

ObjectApiResponse({'took': 26, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': None, 'hits': []}})

In [21]:
"""
    The "term" query is used for exact matching of terms in Elasticsearch.
    It looks for documents where the exact term specified in the query matches the term in the specified field.
    Unlike the "match" and "match_phrase" queries, 
    the "term" query doesn't analyze the search term; it looks for the exact term in the inverted index.
"""
res4 = es.search(index="english", body={
    "from":0,
    "size":0,
    "query":{
        "term":{
            "sentence":"bright-SUNNY"
        }
    }
})

res4

ObjectApiResponse({'took': 1, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 0, 'relation': 'eq'}, 'max_score': None, 'hits': []}})

In [22]:
# Combining queries ------> must, must_not, should

"""
"bool" Query:

    The "bool" query is a compound query that allows you to combine multiple queries using boolean logic.
    It supports various clauses, including:
        "must":
            Documents must match this query. It's similar to the logical AND operation.
        "must_not":
            Documents must not match this query. It's similar to the logical NOT operation.
        "should":
            This represents optional matching. 
            Documents that match this query will contribute to the final score but are not required to match.
"""
res5 = es.search(index="english", body={
    "from":0,
    "size":0,
    "query":{
        "bool":{
            "must_not":{
                "match":{
                    "sentence":"bright"
                }
            }, 
            "should":{
                "match":{
                    "sentence":"sunny"
                }
            }
        }
    }
})

res5


ObjectApiResponse({'took': 3, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': None, 'hits': []}})

In [23]:
# Combining queries ------> must, must_not, should
res6 = es.search(index="english", body={
    "from":0,
    "size":1,
    "query":{
        "bool":{
            "must_not":{
                "match":{
                    "sentence":"bright"
                }
            }, 
            "should":{
                "match":{
                    "sentence":"sunny"
                }
            }
        }
    }
})

res6


ObjectApiResponse({'took': 21, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 0.29667217, 'hits': [{'_index': 'english', '_id': '1', '_score': 0.29667217, '_source': {'sentence': 'Today is a SUNNY day'}}]}})

In [24]:
# Regular Expressions in ES

# Add another document
doc5 = {"sentence":"Today is a rainy day"}
es.index(index="english", id=3, body=doc5)



ObjectApiResponse({'_index': 'english', '_id': '3', '_version': 4, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 11, '_primary_term': 5})

In [25]:
es.get(index="english", id=1)
es.get(index="english", id=2)
es.get(index="english", id=3)

ObjectApiResponse({'_index': 'english', '_id': '3', '_version': 4, '_seq_no': 11, '_primary_term': 5, 'found': True, '_source': {'sentence': 'Today is a rainy day'}})

In [26]:
"""
    "regexp" Query:

    The "regexp" query allows you to search for documents based on a regular expression pattern.
    In this case, the regular expression pattern is ".*" which essentially matches any string (zero or more characters). 
    This means it will match any document where the "sentence" field contains at least one character.
"""

res7 = es.search(index="english", body={
    "from":0,
    "size":3,
    "query":{
        "regexp":{
            "sentence":".*"
        }
    }
})

res7

ObjectApiResponse({'took': 13, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 3, 'relation': 'eq'}, 'max_score': 1.0, 'hits': [{'_index': 'english', '_id': '3', '_score': 1.0, '_source': {'sentence': 'Today is a rainy day'}}, {'_index': 'english', '_id': '1', '_score': 1.0, '_source': {'sentence': 'Today is a SUNNY day'}}, {'_index': 'english', '_id': '2', '_score': 1.0, '_source': {'sentence': 'Today is a BRIGHT-SUNNY day'}}]}})

In [27]:
# Mapping in ElasticSearch 
"""
In Elasticsearch, mapping is the process of defining how documents and their fields should be stored and indexed.
It defines the data type of each field and other properties that control how the data is indexed and queried.
Mapping is crucial for Elasticsearch to understand the structure of your data and to perform efficient searches.
""" 
from datetime import datetime

doc6 = {"city":"Jammu","Country":"India", "datetime":datetime.now()}
doc7 = {"city":"Srinagar","Country":"India", "datetime":datetime.now()}
doc8 = {"city":"New Delhi","Country":"India", "datetime":datetime.now()}



In [28]:
es.index(index="travel",id=1, body=doc6)

ObjectApiResponse({'_index': 'travel', '_id': '1', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 4})

In [29]:
es.index(index="travel",id=2, body=doc7)

ObjectApiResponse({'_index': 'travel', '_id': '2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 4})

In [30]:
es.index(index="travel",id=3, body=doc8)


ObjectApiResponse({'_index': 'travel', '_id': '3', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 4})

In [31]:
es.indices.get_mapping(index="travel")

ObjectApiResponse({'travel': {'mappings': {'properties': {'Country': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'city': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'datetime': {'type': 'date'}}}}})

In [32]:
es.indices.delete(index="travel")

ObjectApiResponse({'acknowledged': True})

In [33]:
es.indices.create(index="travel")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'travel'})

In [34]:
# Date Histogram Aggregation in Elasticsearch
"""
    In Elasticsearch, the Date Histogram Aggregation is a powerful feature 
    that allows you to aggregate data based on date intervals.
    It's particularly useful when you want to analyze data over time,
    such as viewing the number of documents created or events occurring within specific time intervals.
"""



"\n    In Elasticsearch, the Date Histogram Aggregation is a powerful feature \n    that allows you to aggregate data based on date intervals.\n    It's particularly useful when you want to analyze data over time,\n    such as viewing the number of documents created or events occurring within specific time intervals.\n"

In [35]:
# Bulk Insert and Scan
"""Bulk operations in Elasticsearch are efficient ways to index multiple documents in a single request
    or to perform multiple actions in a single API call.
    These operations are often used for bulk indexing or updating of documents.
    
    If you want to perform a bulk scan or retrieve multiple documents in a single request, you can use the mget API. 
"""

'Bulk operations in Elasticsearch are efficient ways to index multiple documents in a single request\n    or to perform multiple actions in a single API call.\n    These operations are often used for bulk indexing or updating of documents.\n    \n    If you want to perform a bulk scan or retrieve multiple documents in a single request, you can use the mget API. \n'

## Elasticsearch Fundamental Concepts

In [37]:
print(es.ping())

# Display all indices
indices=es.indices.get_alias()
for index in indices:
    print(index)

True
.kibana_security_solution_8.12.0_001
.slo-observability.sli-v3
.internal.alerts-ml.anomaly-detection.alerts-default-000001
.internal.alerts-observability.slo.alerts-default-000001
.internal.alerts-observability.apm.alerts-default-000001
.kibana_analytics_8.12.0_001
.internal.alerts-observability.metrics.alerts-default-000001
.apm-custom-link
text_index_using_api
english
.internal.alerts-security.alerts-default-000001
test_index_using_api
welcome
.internal.alerts-stack.alerts-default-000001
.kibana_ingest_8.12.0_001
.internal.alerts-observability.logs.alerts-default-000001
cities
.internal.alerts-observability.uptime.alerts-default-000001
second_index
.kibana_8.12.0_001
.apm-agent-configuration
.apm-source-map
.kibana_alerting_cases_8.12.0_001
owais
.kibana-observability-ai-assistant-conversations-000001
.slo-observability.summary-v3
.internal.alerts-observability.threshold.alerts-default-000001
.kibana-observability-ai-assistant-kb-000001
.kibana_task_manager_8.12.0_001
travel
.sl

  indices=es.indices.get_alias()


In [38]:
# Create index with sequence

index_basename = "hello"
for i in range(1,6):
    response = es.indices.create(index=index_basename+"_"+str(i))
    print(response)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'hello_1'}
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'hello_2'}
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'hello_3'}
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'hello_4'}
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'hello_5'}


In [40]:
import io

# Index creation in bulk using input file
with io.open('input.txt', 'r', encoding='utf-8') as f1:
    data=f1.read()
    f1.close()
    
# print(data)
data= data.split('\n')
# print(data)

for index in data:
    response = es.indices.create(index=index)
    print(response)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'employee'}
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'hr'}


{'acknowledged': True, 'shards_acknowledged': True, 'index': 'finance'}
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'vendor'}
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'sales'}
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'invoice'}


In [42]:
# Search number of indices 

index_name='hello_*'
try:
    response=es.search(index=index_name)
    print(response["_shards"]["total"])
except Exception as e:
    print(str(e))

5


In [43]:
#search and display the index names based on the given search pattern and delete the indices

index_pattern="hello_*"
response=es.indices.get_alias(index=index_pattern)
if(len(response)>0):
    for index in response:
        delete_response=es.indices.delete(index=index)
        print(delete_response)
else:
    print("no index has been found for the given search pattern")


{'acknowledged': True}
{'acknowledged': True}
{'acknowledged': True}
{'acknowledged': True}
{'acknowledged': True}


In [44]:
# delete index files given input directory as BULK
with io.open('input.txt','r', encoding='utf-8') as f1:
    data=f1.read()
    f1.close()
    
# print(data)
data=data.split("\n")
# print(data)

for index in data:
    try:
        response=es.indices.delete(index=index)
        print(response)
    except Exception as e:
        print(str(e))   

{'acknowledged': True}
{'acknowledged': True}
{'acknowledged': True}
{'acknowledged': True}
{'acknowledged': True}
{'acknowledged': True}


In [45]:
# Search all indices

import requests

response=requests.get("http://localhost:9200/_cat/indices?format=json&pretty")
# print(response.json())
data=(response.json())
[print(row["index"]) for row in data]

owais
cities
second_index
text_index_using_api
english
test_index_using_api
travel
welcome


[None, None, None, None, None, None, None, None]

In [46]:
# create index using elastic search api

import requests

response=requests.put("http://localhost:9200/text_index_using_api")
print(response.text)


{"error":{"root_cause":[{"type":"resource_already_exists_exception","reason":"index [text_index_using_api/-S9EwOhjS9CssGMNg2NGLg] already exists","index_uuid":"-S9EwOhjS9CssGMNg2NGLg","index":"text_index_using_api"}],"type":"resource_already_exists_exception","reason":"index [text_index_using_api/-S9EwOhjS9CssGMNg2NGLg] already exists","index_uuid":"-S9EwOhjS9CssGMNg2NGLg","index":"text_index_using_api"},"status":400}


In [47]:
# Search specific index

import requests

index_name="owais"
response=requests.get("http://localhost:9200/"+index_name)
# print(response.text)
print(response.json())
print(response.json().keys())

{'owais': {'aliases': {}, 'mappings': {}, 'settings': {'index': {'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}}, 'number_of_shards': '1', 'provided_name': 'owais', 'creation_date': '1706715674564', 'number_of_replicas': '1', 'uuid': '_GKWi5O4TXeHtG3dYHnvIA', 'version': {'created': '8500008'}}}}}
dict_keys(['owais'])


In [48]:
# Add Document To Elasticsearch Index Using Rest API
import requests

url="http://localhost:9200/"
headers={
    "Content-Type":"application/json",
}
json_data={
    "message":"this is added using python and rest api and id 1",
     "count":10
}
response=requests.put(url+"test_index_using_api/_doc/1?pretty"
                      ,headers=headers,json=json_data)
print(response)

<Response [200]>


In [49]:
# CSV to elasticsearch

import requests
import io


with io.open("weblog.csv","r",encoding="utf-8") as f1:
    data=f1.read()
    f1.close()
lines=data.split("\n")[1:]
i=1
for line in lines:
    ip=line.split(",")[0]
    time=line.split(",")[1]
    url=line.split(",")[2]
    status=line.split(",")[3]
    url="http://localhost:9200/"
    headers={
    "Content-Type":"application/json",
    }
    json_data={
        "ip_address":ip,
        "timesmtap":time,
        "url":url,
        "status":status
    }
    response=requests.put(url+"test_index_using_api/_doc/"+str(i)+"?pretty",
                          headers=headers,
                          json=json_data)
    print(f"doc #{i} is added")
    i=i+1

doc #1 is added
doc #2 is added
doc #3 is added
doc #4 is added
doc #5 is added
doc #6 is added
doc #7 is added
doc #8 is added
doc #9 is added
doc #10 is added
doc #11 is added
doc #12 is added
doc #13 is added
doc #14 is added
doc #15 is added
doc #16 is added
doc #17 is added
doc #18 is added
doc #19 is added
doc #20 is added
doc #21 is added
doc #22 is added
doc #23 is added
doc #24 is added
doc #25 is added
doc #26 is added
doc #27 is added
doc #28 is added
doc #29 is added
doc #30 is added
doc #31 is added
doc #32 is added
doc #33 is added
doc #34 is added
doc #35 is added
doc #36 is added
doc #37 is added
doc #38 is added
doc #39 is added
doc #40 is added
doc #41 is added
doc #42 is added
doc #43 is added
doc #44 is added
doc #45 is added
doc #46 is added
doc #47 is added
doc #48 is added
doc #49 is added
doc #50 is added
doc #51 is added
doc #52 is added
doc #53 is added
doc #54 is added
doc #55 is added
doc #56 is added
doc #57 is added
doc #58 is added
doc #59 is added
doc #6

In [52]:
response=requests.get("http://localhost:9200/test_index_using_api/_doc/1000?_source=false&pretty")
found=response.json()["found"]
if found:
    print("doc id is present")
else:
    print("doc id is not present")

doc id is present
