In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch

es = Elasticsearch("http://localhost:9200")
client_info = es.info()
print("Connected to Elasticsearch")
pprint(client_info.body)

Connected to Elasticsearch
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'nv4JrjX8SLeHDApMSiNUPA',
 'name': '98617b9485a1',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2024-09-02T22:04:47.310170297Z',
             'build_flavor': 'default',
             'build_hash': '253e8544a65ad44581194068936f2a5d57c2c051',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.11.1',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.15.1'}}


### Average Price Per Category
- ` Find the most expensive and cheapest product categories by calculating the avergae price `

In [2]:
# average price per category
avg_price_query = {
    "size": 0,
    "aggs": {
        "avg_price_per_category": {
            "terms": {"field": "category.keyword"},
            "aggs": {
                "average_price": {"avg": {"field": "price"}}
            }
        }
    }
}

In [4]:
response = es.search(index="fakestore", body=avg_price_query)
pprint(response["aggregations"]["avg_price_per_category"]["buckets"])

[{'average_price': {'value': 332.4983317057292},
  'doc_count': 6,
  'key': 'electronics'},
 {'average_price': {'value': 26.286667346954346},
  'doc_count': 6,
  'key': "women's clothing"},
 {'average_price': {'value': 220.99499988555908},
  'doc_count': 4,
  'key': 'jewelery'},
 {'average_price': {'value': 51.057499408721924},
  'doc_count': 4,
  'key': "men's clothing"}]


### Top rated products
- ` Find the products with the highest ratings `

In [5]:
top_rated_query = {
    "size": 5,
    "query": {
        "exists": {"field": "rating"}
    },
    "sort": [{"rating": {"order": "desc"}}]
}

In [6]:
response = es.search(index="fakestore", body=top_rated_query)
pprint(response["hits"]["hits"])

[{'_id': 'OBMbyJUBmMXztNkVHsU5',
  '_ignored': ['description.keyword'],
  '_index': 'fakestore',
  '_score': None,
  '_source': {'category': 'electronics',
              'description': '3D NAND flash are applied to deliver high '
                             'transfer speeds Remarkable transfer speeds that '
                             'enable faster bootup and improved overall system '
                             'performance. The advanced SLC Cache Technology '
                             'allows performance boost and longer lifespan 7mm '
                             'slim design suitable for Ultrabooks and '
                             'Ultra-slim notebooks. Supports TRIM command, '
                             'Garbage Collection technology, RAID, and ECC '
                             '(Error Checking & Correction) to provide the '
                             'optimized performance and enhanced reliability.',
              'id': 11,
              'image': 'https://fakestorea

### Total ratings Count per Category
- ` Measure Customer engagement by counting total reviews per category `

In [9]:
rating_count_query = {
    "size": 0,
    "aggs": {
        "total_ratings_per_category": {
            "terms": {"field": "category.keyword"},
            "aggs": {
                "total_reviews": {"sum": {"field": "review_count"}}
            }
        }
    }
}

In [10]:
response = es.search(index="fakestore", body=rating_count_query)
pprint(response["aggregations"]["total_ratings_per_category"]["buckets"])

[{'doc_count': 6, 'key': 'electronics', 'total_reviews': {'value': 1782.0}},
 {'doc_count': 6,
  'key': "women's clothing",
  'total_reviews': {'value': 1675.0}},
 {'doc_count': 4, 'key': 'jewelery', 'total_reviews': {'value': 970.0}},
 {'doc_count': 4, 'key': "men's clothing", 'total_reviews': {'value': 1309.0}}]


## Count of Products by category
- ` Find out how many products exist in each category ` 

In [16]:
product_count_query = {
    "size": 0,
    "aggs": {
        "products_per_category": {
            "terms": {"field": "category.keyword"}
        }
    }
}

In [17]:
response = es.search(index="fakestore", body=product_count_query)
pprint(response["aggregations"]["products_per_category"]["buckets"])

[{'doc_count': 6, 'key': 'electronics'},
 {'doc_count': 6, 'key': "women's clothing"},
 {'doc_count': 4, 'key': 'jewelery'},
 {'doc_count': 4, 'key': "men's clothing"}]
