In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch

es = Elasticsearch("http://localhost:9200")
client_info = es.info()
print("Elasticsearch Client Information:")
pprint(client_info)


Elasticsearch Client Information:
ObjectApiResponse({'name': '82c3cca05ad1', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'PuXwAaOSSK-vVPeGsKp1QA', 'version': {'number': '8.15.0', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '1a77947f34deddb41af25e6f0ddb8e830159c179', 'build_date': '2024-08-05T10:05:34.233336849Z', 'build_snapshot': False, 'lucene_version': '9.11.1', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})




In [2]:
es.indices.delete(index="deleted_document", ignore_unavailable=True)
es.indices.create(
    index="deleted_document",
    mappings={
        "properties": {
            "suggestion": {
                "type": "completion",
                "analyzer": "simple",
                "search_analyzer": "simple"
            },
            "category": {"type": "keyword"},
            "timestamp": {"type": "date"}  # ISO format: YYYY-MM-DD
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'deleted_document'})

In [3]:
docs = [
    {
        "_index": "deleted_document",
        "_id": "1",
        "_source": {
            "suggestion": {"input": ["Spider-Man"]},
            "category": "movie",
            "timestamp": "2022-01-01"
        }
    },
    {
        "_index": "deleted_document",
        "_id": "2",
        "_source": {
            "suggestion": {"input": ["Spirited Away"]},
            "category": "anime",
            "timestamp": "2023-01-01"
        }
    },
    {
        "_index": "deleted_document",
        "_id": "3",
        "_source": {
            "suggestion": {"input": ["Split"]},
            "category": "thriller",
            "timestamp": "2021-01-01"
        }
    }
]


In [5]:
def insert_documents(docs):
    for doc in docs:
        response = es.index(index=doc["_index"], id=doc["_id"], document=doc["_source"])
        print(f"Document indexed successfully: {response['result']}")

In [6]:
insert_documents(docs)

Document indexed successfully: created
Document indexed successfully: created
Document indexed successfully: created


In [7]:
print("\n📄 All documents before deletion:")
res = es.search(index="deleted_document", query={"match_all": {}})
for hit in res["hits"]["hits"]:
    pprint(hit)



📄 All documents before deletion:
{'_id': '1',
 '_index': 'deleted_document',
 '_score': 1.0,
 '_source': {'category': 'movie',
             'suggestion': {'input': ['Spider-Man']},
             'timestamp': '2022-01-01'}}
{'_id': '2',
 '_index': 'deleted_document',
 '_score': 1.0,
 '_source': {'category': 'anime',
             'suggestion': {'input': ['Spirited Away']},
             'timestamp': '2023-01-01'}}
{'_id': '3',
 '_index': 'deleted_document',
 '_score': 1.0,
 '_source': {'category': 'thriller',
             'suggestion': {'input': ['Split']},
             'timestamp': '2021-01-01'}}


In [8]:
print("\n❌ Deleting document with ID = '3' (Split)")
es.delete(index="deleted_document", id="3", ignore=[404])



❌ Deleting document with ID = '3' (Split)


  es.delete(index="deleted_document", id="3", ignore=[404])


ObjectApiResponse({'_index': 'deleted_document', '_id': '3', '_version': 2, 'result': 'deleted', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 3, '_primary_term': 1})

In [9]:
print("\n📄 Documents after deleting ID '3':")
res = es.search(index="deleted_document", query={"match_all": {}})
for hit in res["hits"]["hits"]:
    pprint(hit)




📄 Documents after deleting ID '3':
{'_id': '1',
 '_index': 'deleted_document',
 '_score': 1.0,
 '_source': {'category': 'movie',
             'suggestion': {'input': ['Spider-Man']},
             'timestamp': '2022-01-01'}}
{'_id': '2',
 '_index': 'deleted_document',
 '_score': 1.0,
 '_source': {'category': 'anime',
             'suggestion': {'input': ['Spirited Away']},
             'timestamp': '2023-01-01'}}


In [10]:
print("\n🧹 Deleting documents with timestamp < 2023...")
es.delete_by_query(
    index="deleted_document",
    body={
        "query": {
            "range": {
                "timestamp": {
                    "lt": "2023-01-01"
                }
            }
        }
    }
)


🧹 Deleting documents with timestamp < 2023...


ObjectApiResponse({'took': 114, 'timed_out': False, 'total': 1, 'deleted': 1, 'batches': 1, 'version_conflicts': 0, 'noops': 0, 'retries': {'bulk': 0, 'search': 0}, 'throttled_millis': 0, 'requests_per_second': -1.0, 'throttled_until_millis': 0, 'failures': []})

In [11]:
print("\n✅ Final documents after query-based deletion:")
res = es.search(index="deleted_document", query={"match_all": {}})
for hit in res["hits"]["hits"]:
    pprint(hit)



✅ Final documents after query-based deletion:
{'_id': '2',
 '_index': 'deleted_document',
 '_score': 1.0,
 '_source': {'category': 'anime',
             'suggestion': {'input': ['Spirited Away']},
             'timestamp': '2023-01-01'}}
