In [5]:
from elasticsearch import Elasticsearch
from transformers import pipeline
from dotenv import load_dotenv, find_dotenv
import os

load_dotenv(find_dotenv())

es = Elasticsearch(
    cloud_id = os.environ.get("CLOUD_ID"),
    api_key = os.environ.get("API_KEY")
)

sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", batch_size=16)

def get_business_id(business_name):
    query = {
        "query": {
            "match": {
                "name": business_name
            }
        }
    }
    result = es.search(index="business_data", body=query)
    # Retrieve the top matched business
    if result['hits']['total']['value'] > 0:
        business = result['hits']['hits'][0]['_source']
        return business['business_id'], business['name']
    else:
        return None, None

def get_reviews(business_id):
    query = {
        "query": {
            "term": {
                "business_id": business_id
            }
        }
    }
    result = es.search(index="business_review_data", body=query, size=1000)
    return [review['_source']['text'] for review in result['hits']['hits']]

def analyze_sentiments_batch(reviews):
    results = sentiment_analyzer(reviews)    
    for i, review in enumerate(reviews):
        results[i]['text'] = review    
    return results

def classify_reviews_batch(reviews):
    sentiments = analyze_sentiments_batch(reviews)
    
    positive_reviews = [r for r in sentiments if r['label'] == 'POSITIVE']
    negative_reviews = [r for r in sentiments if r['label'] == 'NEGATIVE']
    
    top_positive_reviews = sorted(positive_reviews, key=lambda x: x['score'], reverse=True)[:3]
    top_negative_reviews = sorted(negative_reviews, key=lambda x: x['score'], reverse=True)[:3]
    
    return top_positive_reviews, top_negative_reviews

def process_business_reviews(business_name):
    business_id, business_display_name = get_business_id(business_name)
    
    if business_id is None:
        print(f"No business found for name: {business_name}")
        return
    
    reviews = get_reviews(business_id)
    
    if not reviews:
        print(f"No reviews found for business: {business_display_name}")
        return
    
    top_positive, top_negative = classify_reviews_batch(reviews)
    
    print(f"Business: {business_display_name}")
    
    print("\nTop 3 Positive Reviews:")
    for review in top_positive:
        print(f"Review: {review['text']}\nScore: {review['score']}\n")
    
    print("\nTop 3 Negative Reviews:")
    for review in top_negative:
        print(f"Review: {review['text']}\nScore: {review['score']}\n")

business_name = "Imo's Pizza"  
process_business_reviews(business_name)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
  result = es.search(index="business_review_data", body=query, size=1000)


Business: Imo's Pizza

Top 3 Positive Reviews:
Review: Wasn't sure what to expect for never having St Louis  style pizza before.  My fiancé and I came
In and were genuinely greet by Joshilin.  She was fantastic and gave great recommendations.  The 10 inch deluxe was perfect abs the pizza was unique and very good.  Thanks St. Louis Imo's
Score: 0.9998607635498047

Review: The staff was phenomenal, extremely nice. We enjoyed the affordable prices and large portions. The food was good, but we were not blown away. There are also signs along the road to help you find your way and they're in close proximity to the City Museum.
Score: 0.9997757077217102

Review: What a hidden gem!!  We asked someone for a recommendation and they told us Imo's.  A little hard to find because the sign is small, but it was great.  I had a little personal pan pizza and the hubby got a meatball sub.  Both were good.  The staff was so nice.  We were the only 2 in there because there was a festival at the park, but 