In [1]:
from pprint import pprint 
from elasticsearch import Elasticsearch 

es = Elasticsearch('http://localhost:9200')
client_info = es.info()
print('Connected to Elasticsearch!')
pprint(client_info.body)

Connected to Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'tdRTedevS6WZDfd1oEOoeQ',
 'name': 'd3f17afee3cf',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-01-10T10:08:26.972230187Z',
             'build_flavor': 'default',
             'build_hash': 'd4b391d925c31d262eb767b8b2db8f398103f909',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.12.0',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.17.1'}}


In [2]:
es.indices.delete(index='products', ignore_unavailable=True)
es.indices.create(index='products')

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'products'})

In [5]:
import json
from tqdm import tqdm

dummy_data = json.load(open("data/dummy_data.json"))
for document in tqdm(dummy_data, total=len(dummy_data)):
    response = es.index(index='products', body=document)

100%|██████████| 3/3 [00:00<00:00, 31.90it/s]


### Leaf clauses 

In [6]:
# term query 

response = es.search(
    index='products',
    body={
        "query":{
            "term":{
                "created_on": "2024-09-22"
            }
        }
    }
)

n_hits = response['hits']['total']['value']
print(f"Found {n_hits} documents in products")

Found 1 documents in products


In [None]:
retrived_documents = response['hits']['hits']
retrived_documents

[{'_index': 'products',
  '_id': 'CkXV-JQBqAMUwyyAyxUk',
  '_score': 1.0,
  '_source': {'title': 'Sample Title 1',
   'text': 'This is the first sample document text.',
   'created_on': '2024-09-22'}}]

In [8]:
# match query 

response = es.search(
    index='products',
    body={
        "query":{
            "match":{
                "text": "document"
            }
        }
    }
)

n_hits = response['hits']['total']['value']
print(f"Found {n_hits} documents in products")

Found 3 documents in products


In [9]:
retrived_documents = response['hits']['hits']
retrived_documents

[{'_index': 'products',
  '_id': 'CkXV-JQBqAMUwyyAyxUk',
  '_score': 0.13606146,
  '_source': {'title': 'Sample Title 1',
   'text': 'This is the first sample document text.',
   'created_on': '2024-09-22'}},
 {'_index': 'products',
  '_id': 'C0XV-JQBqAMUwyyAyxVg',
  '_score': 0.13606146,
  '_source': {'title': 'Sample Title 2',
   'text': 'Here is another example of a document.',
   'created_on': '2024-09-24'}},
 {'_index': 'products',
  '_id': 'DEXV-JQBqAMUwyyAyxV2',
  '_score': 0.12874341,
  '_source': {'title': 'Sample Title 3',
   'text': 'The content of the third document goes here.',
   'created_on': '2024-09-24'}}]

In [10]:
# range query 

response = es.search(
    index='products',
    body={
        "query":{
            "range":{
                "created_on":{
                    "lte": "2024-09-23"
                }
            }
        }
    }
)

n_hits = response['hits']['total']['value']
print(f"Found {n_hits} documents in products")

Found 1 documents in products


In [11]:
retrived_documents = response['hits']['hits']
retrived_documents

[{'_index': 'products',
  '_id': 'CkXV-JQBqAMUwyyAyxUk',
  '_score': 1.0,
  '_source': {'title': 'Sample Title 1',
   'text': 'This is the first sample document text.',
   'created_on': '2024-09-22'}}]

### compound clauses 

In [13]:
response = es.search(
    index='products',
    body={
        "query":{
            "bool":{
                "must": [
                    {
                        "match": {
                            "text": "third"
                        }
                    },
                    {
                        "range":{
                            "created_on":{
                                "gte": "2024-09-24",
                                "lte": "2024-09-24"
                            }
                        }
                    }
                ]
            }
        }
    }
)

n_hits = response['hits']['total']['value']
print(f"Found {n_hits} documents in products")

Found 1 documents in products


In [14]:
retrived_documents = response['hits']['hits']
retrived_documents

[{'_index': 'products',
  '_id': 'DEXV-JQBqAMUwyyAyxV2',
  '_score': 1.94566,
  '_source': {'title': 'Sample Title 3',
   'text': 'The content of the third document goes here.',
   'created_on': '2024-09-24'}}]