### Building a custom component to perform sentiment analysis

This notebook will perform API calls against this entrypoint

https://rapidapi.com/beat-analytics-beat-analytics-default/api/yelp-business-reviews

You can sign up for a basic account at $0 per month - ensure to store your API key in an `.env` file under the name `RAPID_API_KEY` before using this notebook. 

In [1]:
import requests
from dotenv import load_dotenv
import os 
load_dotenv(".env")
RAPID_API_KEY = os.getenv("RAPID_API_KEY")


In [2]:
from haystack.components.routers import TransformersTextRouter

text_router = TransformersTextRouter(model="cardiffnlp/twitter-roberta-base-sentiment")
text_router.warm_up()

Device set to use mps


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

In [3]:
from haystack import component, Document
from typing import Any, Dict, List, Union

@component
class YelpReviewSentimentExtractor:
    @component.output_types(documents=List[Dict])
    def run(self, url: str, headers: Dict, querystring: Dict) -> List[Document]:
        """Extracts reviews from Yelp API and performs sentiment analysis on the reviews.
        
        Args:
            url (str): Yelp API URL.
            headers (Dict): Request headers.
            querystring (Dict): Request query parameters.
        
        Returns:
            List[Document]: List of documents with sentiment analysis results."""
        reviews = self._extract_reviews(url, headers, querystring)
        reviews_document = self._sentiment_analysis(reviews)
        
        return {"documents" : reviews_document}
    
    def _extract_reviews(self, url: str, headers: Dict, querystring: Dict) -> List[Dict]:
        """Extracts reviews from Yelp API.
        
        Args:
            url (str): Yelp API URL.
            headers (Dict): Request headers.
            querystring (Dict): Request query parameters.
            
        Returns:
            List[Dict]: List of reviews.
        """
        try:
            response = requests.get(url, headers=headers, params=querystring)

            results = response.json()
            
            reviews = [{"text":results['reviews'][i]['text'],
                        "rating": results['reviews'][i]['rating'],
                        "url": results['reviews'][i]['url']} \
                        for i in range(len(results['reviews']))]
            
            return reviews
        except Exception as e:
            return []
            print(e)
            
    def _sentiment_analysis(self, reviews: List[Dict]) -> List[Document]:
        """Performs sentiment analysis on the reviews.

        Args:
            reviews (List[Dict]): List of reviews.

        Returns:
            List[Document]: List of documents with sentiment analysis results.
        """
        sent_results = []

        for idx, query in enumerate(reviews):
            result = text_router.run(text=query['text'])
            document = Document(id=idx, 
                                content=query['text'],
                                meta= {"rating": query['rating'],
                                       "url": query['url'],
                                    "sentiment": {"LABEL_0": "negative", "LABEL_1": "neutral", "LABEL_2": "positive"}.get(next(iter(result)), "Unknown")}  # Mapping the label
                                )

            sent_results.append(document)
        return  sent_results



In [4]:
url = "https://yelp-business-reviews.p.rapidapi.com/reviews/RJNAeNA-209sctUO0dmwuA"
querystring = {"sortBy":"lowestRated"}

headers = {
	"x-rapidapi-key": RAPID_API_KEY,
	"x-rapidapi-host": "yelp-business-reviews.p.rapidapi.com"
}

extract_reviews = YelpReviewSentimentExtractor()
extract_reviews.run(url=url, headers=headers, querystring=querystring)



{'documents': [Document(id=697b73ea47fe6f5510d01f4aaea6e47ee0ab15ea8150ae43ad9e314cb493801d, content: 'Went in on a Tuesday for lunch. Wasn't overly busy,  cheese curds were great however.... Server didn...', meta: {'rating': 1, 'url': 'https://www.yelp.com/biz/RJNAeNA-209sctUO0dmwuA?hrid=pkO8UmZLWRI5Qfx0lJVRaQ', 'sentiment': 'positive'}),
  Document(id=1, content: 'I had not been to The Old Fashioned in about a decade. What happened?? The food was simply terrible,...', meta: {'rating': 1, 'url': 'https://www.yelp.com/biz/RJNAeNA-209sctUO0dmwuA?hrid=GUM7ePAhgOA4YRAFUEOkyQ', 'sentiment': 'negative'}),
  Document(id=2, content: 'We went midday around 2:30 in the afternoon on a Tuesday. Got drinks at the bar to wait for a table....', meta: {'rating': 1, 'url': 'https://www.yelp.com/biz/RJNAeNA-209sctUO0dmwuA?hrid=DK8kbUwWK2eui1zpV7weDw', 'sentiment': 'positive'}),
  Document(id=3, content: 'Service was okay, but probably the nastiest food I think I've ever had. If you like good food do no

In [None]:
querystring = {"sortBy":"highestRated"}
extract_reviews.run(url=url, headers=headers, querystring=querystring)