In [1]:
# installing elastic search
# https://www.elastic.co/guide/en/elasticsearch/reference/current/targz.html

# wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.16.2-darwin-x86_64.tar.gz
# wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.16.2-darwin-x86_64.tar.gz.sha512
# shasum -a 512 -c elasticsearch-7.16.2-darwin-x86_64.tar.gz.sha512 
# tar -xzf elasticsearch-7.16.2-darwin-x86_64.tar.gz
# cd elasticsearch-7.16.2/ 

# Triggering/executing elastic search:
# ./bin/elasticsearch

# installing elastic search python client
# !pip install elasticsearch

Collecting elasticsearch
  Downloading elasticsearch-7.16.2-py2.py3-none-any.whl (385 kB)
[K     |████████████████████████████████| 385 kB 10 kB/s eta 0:00:0146
Installing collected packages: elasticsearch
Successfully installed elasticsearch-7.16.2


In [5]:
from elasticsearch import Elasticsearch

In [6]:
es = Elasticsearch()
# es = Elasticsearch(HOST="http://localhost", PORT=9200)

In [10]:
# creating an index

if es.indices.exists(index='first_index'):
    es.indices.delete(index='first_index')

es.indices.create(index='first_index')

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'first_index'}

In [9]:
es.indices.exists(index='first_index')



True

In [11]:
# inserting data

doc_1 = {'city': 'Paris', 'country': 'France'}
doc_2 = {'city': 'Vienna', 'country': 'Austria'}
doc_3 = {'city': 'London', 'country': 'England'}

In [12]:
es.index(index='cities', doc_type='places', id=1, body=doc_1)

  es.index(index='cities', doc_type='places', id=1, body=doc_1)


{'_index': 'cities',
 '_type': 'places',
 '_id': '1',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 0,
 '_primary_term': 1}

In [13]:
es.index(index='cities', doc_type='places', id=2, body=doc_2)

  es.index(index='cities', doc_type='places', id=2, body=doc_2)


{'_index': 'cities',
 '_type': 'places',
 '_id': '2',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 1,
 '_primary_term': 1}

In [14]:
es.index(index='cities', doc_type='places', id=3, body=doc_3)

  es.index(index='cities', doc_type='places', id=3, body=doc_3)


{'_index': 'cities',
 '_type': 'places',
 '_id': '3',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 2,
 '_primary_term': 1}

In [15]:
# getting/reading the data

res = es.get(index='cities', doc_type='places', id=1)
res

{'_index': 'cities', '_type': 'places', '_id': '1', '_version': 1, '_seq_no': 0, '_primary_term': 1, 'found': True, '_source': {'city': 'Paris', 'country': 'France'}}




In [17]:
res = es.get(index='cities', doc_type='places', id=2)
res



{'_index': 'cities',
 '_type': 'places',
 '_id': '2',
 '_version': 1,
 '_seq_no': 1,
 '_primary_term': 1,
 'found': True,
 '_source': {'city': 'Vienna', 'country': 'Austria'}}

In [18]:
res = es.get(index='cities', doc_type='places', id=3)
res

{'_index': 'cities',
 '_type': 'places',
 '_id': '3',
 '_version': 1,
 '_seq_no': 2,
 '_primary_term': 1,
 'found': True,
 '_source': {'city': 'London', 'country': 'England'}}

In [19]:
res['_source']

{'city': 'London', 'country': 'England'}

In [20]:
# search query and matching documents

search_1 = {"sentence": "Hack COVID-19 will be over soon"}
search_2 = {"sentence": "Hack-Qurantine will get away soon"}

es.index(index='english', doc_type='sentences', id=1, body=search_1)
es.index(index='english', doc_type='sentences', id=2, body=search_2)

  es.index(index='english', doc_type='sentences', id=1, body=search_1)
  es.index(index='english', doc_type='sentences', id=2, body=search_2)


{'_index': 'english',
 '_type': 'sentences',
 '_id': '2',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 1,
 '_primary_term': 1}

In [21]:
# match query

body = {
    "from": 0,
    "size": 0,
    "query": {
        "match": {
            "sentence": "Hack"
        }
    }
}

res = es.search(index='english', body=body)
res

  res = es.search(index='english', body=body)


{'took': 11,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [22]:
# changed size to 2
body = {
    "from": 0,
    "size": 2,
    "query": {
        "match": {
            "sentence": "Hack"
        }
    }
}

res = es.search(index='english', body=body)
res

  res = es.search(index='english', body=body)


{'took': 7,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': 0.18824537,
  'hits': [{'_index': 'english',
    '_type': 'sentences',
    '_id': '2',
    '_score': 0.18824537,
    '_source': {'sentence': 'Hack-Qurantine will get away soon'}},
   {'_index': 'english',
    '_type': 'sentences',
    '_id': '1',
    '_score': 0.17675921,
    '_source': {'sentence': 'Hack COVID-19 will be over soon'}}]}}

In [23]:

body = {
    "from": 0,
    "size": 2,
    "query": {
        "match": {
            "sentence": "Hack Qurantine"
        }
    }
}

res = es.search(index='english', body=body)
res

  res = es.search(index='english', body=body)


{'took': 12,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': 0.9039135,
  'hits': [{'_index': 'english',
    '_type': 'sentences',
    '_id': '2',
    '_score': 0.9039135,
    '_source': {'sentence': 'Hack-Qurantine will get away soon'}},
   {'_index': 'english',
    '_type': 'sentences',
    '_id': '1',
    '_score': 0.17675921,
    '_source': {'sentence': 'Hack COVID-19 will be over soon'}}]}}

In [25]:
# changed size to 10
body = {
    "from": 0,
    "size": 10,
    "query": {
        "match": {
            "sentence": "Hack Qurantine"
        }
    }
}

res = es.search(index='english', body=body)
res

  res = es.search(index='english', body=body)


{'took': 5,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': 0.9039135,
  'hits': [{'_index': 'english',
    '_type': 'sentences',
    '_id': '2',
    '_score': 0.9039135,
    '_source': {'sentence': 'Hack-Qurantine will get away soon'}},
   {'_index': 'english',
    '_type': 'sentences',
    '_id': '1',
    '_score': 0.17675921,
    '_source': {'sentence': 'Hack COVID-19 will be over soon'}}]}}

In [26]:
# changed size to 1
body = {
    "from": 0,
    "size": 1,
    "query": {
        "match": {
            "sentence": "Hack Qurantine"
        }
    }
}

res = es.search(index='english', body=body)
res

  res = es.search(index='english', body=body)


{'took': 3,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': 0.9039135,
  'hits': [{'_index': 'english',
    '_type': 'sentences',
    '_id': '2',
    '_score': 0.9039135,
    '_source': {'sentence': 'Hack-Qurantine will get away soon'}}]}}

In [27]:
# match phrase
body = {
    "from": 0,
    "size": 10,
    "query": {
        "match_phrase": {
            "sentence": "Hack Qurantine"
        }
    }
}

res = es.search(index='english', body=body)
res

  res = es.search(index='english', body=body)


{'took': 12,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 0.9039135,
  'hits': [{'_index': 'english',
    '_type': 'sentences',
    '_id': '2',
    '_score': 0.9039135,
    '_source': {'sentence': 'Hack-Qurantine will get away soon'}}]}}

In [28]:
# combining queries

body = {
    "from": 0,
    "size": 10,
    "query": {
        "bool": {
            "must_not": {
                "match": {
                    "sentence": "COVID"
                }
            },
            "should": {
                "match": {
                    "sentence": "Hack"
                }
            }
        }
    }
}

res = es.search(index='english', body=body)
res

  res = es.search(index='english', body=body)


{'took': 5,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 0.18824537,
  'hits': [{'_index': 'english',
    '_type': 'sentences',
    '_id': '2',
    '_score': 0.18824537,
    '_source': {'sentence': 'Hack-Qurantine will get away soon'}}]}}

In [None]:
# regular expression queries

