In [3]:
from elasticsearch import Elasticsearch

In [16]:
es = Elasticsearch(
    [{'host': 'localhost', 'port': 9200, 'scheme': 'http'}],
    basic_auth=('elastic', '123456')
)

In [17]:
es

<Elasticsearch(['http://localhost:9200'])>

Creating First Index

In [18]:
res = es.indices.create(index = 'm_index')

In [19]:
res

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'm_index'})

In [21]:
es.indices.exists(index="m_index")

HeadApiResponse(True)

Deleting Index

In [22]:
demo_index = es.indices.create(index = 'demo_indx')

In [23]:
demo_index

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'demo_indx'})

In [24]:
es.indices.delete(index = 'demo_indx')

ObjectApiResponse({'acknowledged': True})

In [25]:
es.indices.exists(index='demo_indx')

HeadApiResponse(False)

Inserting and Getting data

In [26]:
Data_1 = {"Name": "Arjun", "occupation": "DA"}
Data_2 = {"Name": "Gokul", "occupation": "DS"}
Data_3 = {"Name": "Kiran", "occupation": "DE"}

In [29]:
#allocating data_1 to body with id as 1 mentioning index and doc_type
es.index(index="person", id = 1, body= Data_1)

ObjectApiResponse({'_index': 'person', '_id': '1', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

In [30]:
es.index(index="person", id = 2, body= Data_2)

ObjectApiResponse({'_index': 'person', '_id': '2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1})

In [31]:
es.index(index="person", id = 3, body= Data_3)

ObjectApiResponse({'_index': 'person', '_id': '3', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1})

In [32]:
#checking if the index named "person" exists
es.indices.exists(index="person")

HeadApiResponse(True)

In [33]:
#getting the data, you can see the actual record along with the meta

res = es.get(index = "person", id = 2)

In [34]:
res

ObjectApiResponse({'_index': 'person', '_id': '2', '_version': 1, '_seq_no': 1, '_primary_term': 1, 'found': True, '_source': {'Name': 'Gokul', 'occupation': 'DS'}})

In [35]:
#it will on;y output the JSON structure for the record only
res["_source"]

{'Name': 'Gokul', 'occupation': 'DS'}

Searching Query and Matching Document

In [38]:
# Creating new record data 

doc_4 = {"sentence":"Every child likes an ice cream."}
doc_5 = {"sentence":"She swims every morning"}
doc_6 = {"sentence":"Joe waited for the train."}
doc_7 = {"sentence":"I wait for Mary and Samantha at the bus station every morning."}

In [39]:
#allocating ID for all doc

es.index(index = "english", id = 4, body = doc_4)
es.index(index = "english", id = 5, body = doc_5)
es.index(index = "english", id = 6, body = doc_6)
es.index(index = "english", id = 7, body = doc_7)

ObjectApiResponse({'_index': 'english', '_id': '7', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 3, '_primary_term': 1})

In [59]:
res = es.get(index = "english", id = 4)
res

ObjectApiResponse({'_index': 'english', '_id': '4', '_version': 1, '_seq_no': 0, '_primary_term': 1, 'found': True, '_source': {'sentence': 'Every child likes an ice cream.'}})

In [61]:
#print all doc in single index
query = {
    "query": {
        "match_all": {}
    },
    "size": 1000  # Number of documents to retrieve per batch
}

res = es.search(index = "english", body = query)
res['hits']

{'total': {'value': 4, 'relation': 'eq'},
 'max_score': 1.0,
 'hits': [{'_index': 'english',
   '_id': '4',
   '_score': 1.0,
   '_source': {'sentence': 'Every child likes an ice cream.'}},
  {'_index': 'english',
   '_id': '5',
   '_score': 1.0,
   '_source': {'sentence': 'She swims every morning'}},
  {'_index': 'english',
   '_id': '6',
   '_score': 1.0,
   '_source': {'sentence': 'Joe waited for the train.'}},
  {'_index': 'english',
   '_id': '7',
   '_score': 1.0,
   '_source': {'sentence': 'I wait for Mary and Samantha at the bus station every morning.'}}]}

QUERY

In [46]:
#writing query for finding the specific information

body = {
    "from" : 0, #from which doc the query to be done/offset from region
    "size" : 3, #how many result to displayed if the query returns result/no of matched docs
    "query" : {
        "match" : { #this will find the word "every" that matchs from the sentences 
            "sentence" : "every"
        }
    }
}

In [47]:
res = es.search(index = "english", body = body)
res

ObjectApiResponse({'took': 9, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 3, 'relation': 'eq'}, 'max_score': 0.42800996, 'hits': [{'_index': 'english', '_id': '5', '_score': 0.42800996, '_source': {'sentence': 'She swims every morning'}}, {'_index': 'english', '_id': '4', '_score': 0.37365946, '_source': {'sentence': 'Every child likes an ice cream.'}}, {'_index': 'english', '_id': '7', '_score': 0.27058095, '_source': {'sentence': 'I wait for Mary and Samantha at the bus station every morning.'}}]}})

In [48]:
res["hits"]

{'total': {'value': 3, 'relation': 'eq'},
 'max_score': 0.42800996,
 'hits': [{'_index': 'english',
   '_id': '5',
   '_score': 0.42800996,
   '_source': {'sentence': 'She swims every morning'}},
  {'_index': 'english',
   '_id': '4',
   '_score': 0.37365946,
   '_source': {'sentence': 'Every child likes an ice cream.'}},
  {'_index': 'english',
   '_id': '7',
   '_score': 0.27058095,
   '_source': {'sentence': 'I wait for Mary and Samantha at the bus station every morning.'}}]}

In [49]:
body = {
    "from" : 2, #from which doc the query to be done/offset from region
    "size" : 4, #how many result to displayed if the query returns result/no of matched docs
    "query" : {
        "match" : { #this will find the word "every" that matchs from the sentences 
            "sentence" : "every"
        }
    }
}

In [50]:
res = es.search(index="english", body=body) # getting the result, You can see the actual record along with the meta
res

ObjectApiResponse({'took': 5, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 3, 'relation': 'eq'}, 'max_score': 0.42800996, 'hits': [{'_index': 'english', '_id': '7', '_score': 0.27058095, '_source': {'sentence': 'I wait for Mary and Samantha at the bus station every morning.'}}]}})

In [51]:
res["hits"]

{'total': {'value': 3, 'relation': 'eq'},
 'max_score': 0.42800996,
 'hits': [{'_index': 'english',
   '_id': '7',
   '_score': 0.27058095,
   '_source': {'sentence': 'I wait for Mary and Samantha at the bus station every morning.'}}]}

In [56]:
query = { #default 'size' is 10, default 'from' value is 0
    "query": {
        "match": {
            "sentence": "Every"
        }
    }
}

res = es.search(index="english", body=query) # getting the result, You can see the actual record along with the meta
res['hits']

{'total': {'value': 3, 'relation': 'eq'},
 'max_score': 0.42800996,
 'hits': [{'_index': 'english',
   '_id': '5',
   '_score': 0.42800996,
   '_source': {'sentence': 'She swims every morning'}},
  {'_index': 'english',
   '_id': '4',
   '_score': 0.37365946,
   '_source': {'sentence': 'Every child likes an ice cream.'}},
  {'_index': 'english',
   '_id': '7',
   '_score': 0.27058095,
   '_source': {'sentence': 'I wait for Mary and Samantha at the bus station every morning.'}}]}

In [52]:
#Find how many documents are there in single index using count
count_doc = es.count(index = "english")
count_doc

ObjectApiResponse({'count': 4, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

In [53]:
count_doc = es.count(index = "person")
count_doc

ObjectApiResponse({'count': 3, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

Combined Query

In [64]:
body = {
    "from" : 0,
    "size" : 4,
    "query" : {
        "bool": {
            "must_not" :{
                "match":{
                    "sentence" : "waited"
                }
            },
            "should" : {
                "regexp" : { #regexp - regular expression, Regular expressions allow for pattern matching within strings, which is useful for complex string searches.
                    "sentence" : ".*" #^start.* - matches any sentence that starts with the word "start". The caret ^ indicates the beginning of the string.
                }
            }
        }
        
    }
}

res = es.search(index = "english", body = body)
res['hits']

{'total': {'value': 3, 'relation': 'eq'},
 'max_score': 1.0,
 'hits': [{'_index': 'english',
   '_id': '4',
   '_score': 1.0,
   '_source': {'sentence': 'Every child likes an ice cream.'}},
  {'_index': 'english',
   '_id': '5',
   '_score': 1.0,
   '_source': {'sentence': 'She swims every morning'}},
  {'_index': 'english',
   '_id': '7',
   '_score': 1.0,
   '_source': {'sentence': 'I wait for Mary and Samantha at the bus station every morning.'}}]}

Match & match_phrase

In [65]:
doc_8 = {"sentence": "The quick brown fox jumps over the lazy dog"}
doc_9 = {"sentence": "The brown fox is quick and jumps over the lazy dog"}
doc_10 = {"sentence": "Quickly, the brown fox jumps over the dog"}

In [66]:
es.index(index = "english", id = 8, body = doc_8)
es.index(index = "english", id = 9, body = doc_9)
es.index(index = "english", id = 10, body = doc_10)

ObjectApiResponse({'_index': 'english', '_id': '10', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 6, '_primary_term': 1})

In [73]:
#match - match the word from the sentence
body = {
    "from" : 0,
    "size" : 10,
    "query": {
        "match": {
            "sentence": "quick brown fox"
    }
  }
}

res = es.search(index = "english", body = body)
res['hits']

{'total': {'value': 3, 'relation': 'eq'},
 'max_score': 2.6583266,
 'hits': [{'_index': 'english',
   '_id': '8',
   '_score': 2.6583266,
   '_source': {'sentence': 'The quick brown fox jumps over the lazy dog'}},
  {'_index': 'english',
   '_id': '9',
   '_score': 2.4204364,
   '_source': {'sentence': 'The brown fox is quick and jumps over the lazy dog'}},
  {'_index': 'english',
   '_id': '10',
   '_score': 1.6411504,
   '_source': {'sentence': 'Quickly, the brown fox jumps over the dog'}}]}

In [72]:
#match_phrase - match the full phrase from the sentence
body = {
    "from" : 0,
    "size" : 10,
    "query": {
        "match_phrase": {
            "sentence": "quick brown fox"
    }
  }
}

res = es.search(index = "english", body = body)
res['hits']

{'total': {'value': 1, 'relation': 'eq'},
 'max_score': 2.6583264,
 'hits': [{'_index': 'english',
   '_id': '8',
   '_score': 2.6583264,
   '_source': {'sentence': 'The quick brown fox jumps over the lazy dog'}}]}

In [77]:
res = es.search(index="english", body={ "from": 0, 
                                        "size": 10, 
                                        "query": { 
                                            "regexp": { 
                                                "sentence":".*"}}})
res["hits"]

{'total': {'value': 7, 'relation': 'eq'},
 'max_score': 1.0,
 'hits': [{'_index': 'english',
   '_id': '4',
   '_score': 1.0,
   '_source': {'sentence': 'Every child likes an ice cream.'}},
  {'_index': 'english',
   '_id': '5',
   '_score': 1.0,
   '_source': {'sentence': 'She swims every morning'}},
  {'_index': 'english',
   '_id': '6',
   '_score': 1.0,
   '_source': {'sentence': 'Joe waited for the train.'}},
  {'_index': 'english',
   '_id': '7',
   '_score': 1.0,
   '_source': {'sentence': 'I wait for Mary and Samantha at the bus station every morning.'}},
  {'_index': 'english',
   '_id': '8',
   '_score': 1.0,
   '_source': {'sentence': 'The quick brown fox jumps over the lazy dog'}},
  {'_index': 'english',
   '_id': '9',
   '_score': 1.0,
   '_source': {'sentence': 'The brown fox is quick and jumps over the lazy dog'}},
  {'_index': 'english',
   '_id': '10',
   '_score': 1.0,
   '_source': {'sentence': 'Quickly, the brown fox jumps over the dog'}}]}

In [78]:
es.search(index="english", body={"from":0, "size":3,"query":{"regexp":{"sentence":"br.*"}}})

ObjectApiResponse({'took': 21, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 3, 'relation': 'eq'}, 'max_score': 1.0, 'hits': [{'_index': 'english', '_id': '8', '_score': 1.0, '_source': {'sentence': 'The quick brown fox jumps over the lazy dog'}}, {'_index': 'english', '_id': '9', '_score': 1.0, '_source': {'sentence': 'The brown fox is quick and jumps over the lazy dog'}}, {'_index': 'english', '_id': '10', '_score': 1.0, '_source': {'sentence': 'Quickly, the brown fox jumps over the dog'}}]}})