In [3]:
from elasticsearch import Elasticsearch
from pprint import pprint
import pandas as pd

es = Elasticsearch("http://localhost:9200")
client_info = es.info()
pprint(client_info.body)

{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'nv4JrjX8SLeHDApMSiNUPA',
 'name': '98617b9485a1',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2024-09-02T22:04:47.310170297Z',
             'build_flavor': 'default',
             'build_hash': '253e8544a65ad44581194068936f2a5d57c2c051',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.11.1',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.15.1'}}


### 1. Find the category with the highest rated products

In [7]:
highest_rated_categry_query = {
    "size": 0,
    "aggs": {
        "categories": {
            "terms": {"field": "category.keyword"},
            "aggs": {
                "avg_rating": {"avg": {"field": "rating"}}
            }
        }
    }
}

In [9]:
#execute query
response = es.search(index="fakestore", body=highest_rated_categry_query)

#Extract Data
highest_rated_category = sorted(
    [
        {"Category": bucket["key"], "Avg Rating": bucket["avg_rating"]["value"]}

        for bucket in response["aggregations"]["categories"]["buckets"]
    ],
    key = lambda x:x["Avg Rating"], reverse=True
)[0]  #Get the highest Rated Category

print("\n Category with Highest Rated Products:")
print(highest_rated_category)


 Category with Highest Rated Products:
{'Category': "men's clothing", 'Avg Rating': 3.6999999284744263}


### 2. Find the category with the most products

In [14]:
# Query to find the category with the most products

most_products_category_query = {
    "size": 0,
    "aggs": {
        "categories": {
            "terms": {"field": "category.keyword", "size": 10}
        }
    }
}

In [15]:
# execute query
response = es.search(index="fakestore", body=most_products_category_query)

#extract data

most_products_category = sorted(
    [
        {"Category": bucket["key"], "Product Count": bucket["doc_count"]}

        for bucket in response["aggregations"]["categories"]["buckets"]
    ],
    key = lambda x:x["Product Count"], reverse=True
)[0] # Get the category with the most products

print("Category with the most products:")
print(most_products_category)

Category with the most products:
{'Category': 'electronics', 'Product Count': 6}


### 3. Find the Category with the highest Review Count

In [16]:
# Query to find the category with the highest total review count
highest_review_count_category_query = {
    "size": 0,
    "aggs": {
        "categories": {
            "terms": {"field": "category.keyword"},
            "aggs": {
                "total_reviews": {"sum": {"field": "reviewCount"}}
            }
        }
    }
}

In [17]:
# Execute query
response = es.search(index="fakestore", body=highest_review_count_category_query)

pprint(response)

ObjectApiResponse({'took': 2, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 20, 'relation': 'eq'}, 'max_score': None, 'hits': []}, 'aggregations': {'categories': {'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 0, 'buckets': [{'key': 'electronics', 'doc_count': 6, 'total_reviews': {'value': 0.0}}, {'key': "women's clothing", 'doc_count': 6, 'total_reviews': {'value': 0.0}}, {'key': 'jewelery', 'doc_count': 4, 'total_reviews': {'value': 0.0}}, {'key': "men's clothing", 'doc_count': 4, 'total_reviews': {'value': 0.0}}]}}})


In [18]:
# extract data
highest_review_category = sorted(
    [
        {"Category": bucket["key"], "Total Reviews": bucket["total_reviews"]["value"]}

        for bucket in response["aggregations"]["categories"]["buckets"]
    ],

    key = lambda x:x["Total Reviews"], reverse = True
)[0]  #Get the category with the most reviews

print("Category with the highest review count:")
print(highest_review_category)

Category with the highest review count:
{'Category': 'electronics', 'Total Reviews': 0.0}
