In [1]:
from src.faker_class import FakerWrapper
from src.elasticsearch_client import ElasticsearchClient
from src.logger import Logger
from data.index_mapping import *

# Initialize logger
logger = Logger(__name__)

INDEX_NAME = "movie_reviews"


In [2]:
es = ElasticsearchClient()

# delete index
es.delete_index(index_name=INDEX_NAME)

2024-05-28 13:47:41,730 - src.elasticsearch_client - INFO - [0;32mConnected to Elasticsearch cluster[0m
2024-05-28 13:47:41,734 - src.elasticsearch_client - ERROR - [0;31mIndex or document not found: NotFoundError(404, 'index_not_found_exception', 'no such index [movie_reviews]', movie_reviews, index_or_alias)[0m


In [3]:
es.create_index("movie_reviews", mapping=movie_review_mapping)

2024-05-28 13:47:57,071 - src.elasticsearch_client - INFO - [0;32mCreated index movie_reviews[0m


True

In [4]:
fw = FakerWrapper()
sample_data_generator = fw.generate_data(mapping=fw.mapping, num_documents=10_000)


2024-05-28 13:49:10,004 - src.faker_class - INFO - [0;32mInitialize FakerWrapper Class[0m


In [5]:
es.insert_one_document(INDEX_NAME, body=next(sample_data_generator))
es.insert_one_document(INDEX_NAME, body=next(sample_data_generator), doc_id=22)


2024-05-28 13:50:10,848 - src.elasticsearch_client - INFO - [0;32minsert one document to movie_reviews with body: {'movie_title': 'Sing compare if gas brother exist seat.', 'director': 'Adam Clark', 'main_actor': 'David Ramirez', 'genre': 'Action', 'release_date': datetime.date(2008, 6, 12), 'users_rating': 8, 'imdb_rating': 1}[0m
2024-05-28 13:50:10,915 - src.elasticsearch_client - INFO - [0;32minsert one document to movie_reviews with body: {'movie_title': 'West join none such.', 'director': 'Kenneth Fernandez', 'main_actor': 'Christine Alexander DVM', 'genre': 'Action', 'release_date': datetime.date(2014, 4, 3), 'users_rating': 1, 'imdb_rating': 5}[0m


True

In [6]:
es.bulk_index_documents(index_name=INDEX_NAME, documents=[next(sample_data_generator) for _ in range(10)])

2024-05-28 13:50:29,892 - src.elasticsearch_client - INFO - [0;32mBulk insert (10, []) documents to movie_reviews index[0m


In [7]:
document = es.get_document(index_name=INDEX_NAME, doc_id=22)
logger.info(f"document with id 22: {document}")

2024-05-28 13:50:43,382 - src.elasticsearch_client - INFO - [0;32mGetting document from index: movie_reviews with id: 22[0m
2024-05-28 13:50:43,384 - __main__ - INFO - [0;32mdocument with id 22: {'movie_title': 'West join none such.', 'director': 'Kenneth Fernandez', 'main_actor': 'Christine Alexander DVM', 'genre': 'Action', 'release_date': '2014-04-03', 'users_rating': 1, 'imdb_rating': 5}[0m


In [8]:
es.delete_document(index_name=INDEX_NAME, doc_id=22)

2024-05-28 13:50:58,812 - src.elasticsearch_client - INFO - [0;32mDelete document 22 from movie_reviews index[0m


True

In [9]:
es.count(index_name=INDEX_NAME)

2024-05-28 13:51:01,757 - src.elasticsearch_client - INFO - [0;32mCount executed on index movie_reviews, 11 documents![0m


11

In [10]:
es.delete_by_query(query={"match": {"genre": "Drama"}}, index_name=INDEX_NAME)

2024-05-28 13:51:21,638 - src.elasticsearch_client - INFO - [0;32mDeleted documents from index movie_reviews that match query {'match': {'genre': 'Drama'}}[0m


In [12]:
query = {
            "bool": {
                "must": [
                    {"match": {"genre": "Romance"}},
                    {"range": {"rating": {"gte": 4}}}
                ]
            }
        }

result = es.search(index_name=INDEX_NAME, query=query)
logger.info(f"result of search: {len(result)} documents")

2024-05-28 13:52:08,336 - src.elasticsearch_client - INFO - [0;32mSearch executed on index movie_reviews with query {'bool': {'must': [{'match': {'genre': 'Romance'}}, {'range': {'rating': {'gte': 4}}}]}}[0m
2024-05-28 13:52:08,340 - __main__ - INFO - [0;32mresult of search: 0 documents[0m


In [13]:
for hit in result:
    logger.info(f"document: {hit}")