Name: Pai Northern Thai Kitchen

In [17]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_yelp(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract the name of the restaurant
    restaurant_name_tag = soup.find('h1')
    restaurant_name = restaurant_name_tag.text if restaurant_name_tag else "Unknown"

    # Extract the total number of reviews
    total_reviews_tag = soup.find('span', class_='css-bq71j2')
    total_reviews = total_reviews_tag.text.split()[0] if total_reviews_tag else "Unknown"

    # Extract individual reviews, reviewers, and ratings
    reviews = soup.find_all('div', class_='review__373c0__13kpL')
    review_texts = []
    reviewers = []
    ratings = []

    for review in reviews:
        review_text_tag = review.find('span', class_='raw__373c0__3rcx7')
        reviewer_tag = review.find('a', class_='css-166la90')
        rating_tag = review.find('div', class_='i-stars__373c0__1T6rz')
        
        review_texts.append(review_text_tag.text if review_text_tag else "Unknown")
        reviewers.append(reviewer_tag.text if reviewer_tag else "Unknown")
        ratings.append(rating_tag['aria-label'].split()[0] if rating_tag else "Unknown")

    # Store the data in a DataFrame and return it
    df = pd.DataFrame({
        'Restaurant Name': [restaurant_name] * len(review_texts),
        'Total Reviews': [total_reviews] * len(review_texts),
        'Review Text': review_texts,
        'Reviewer': reviewers,
        'Rating': ratings
    })

    return df

if __name__ == "__main__":
    urls = [
        "https://www.yelp.ca/biz/pai-northern-thai-kitchen-toronto-5?osq=Restaurant",
        # Add more URLs here if needed
    ]
    
    all_data = pd.DataFrame()

    for url in urls:
        df = scrape_yelp(url)
        all_data = pd.concat([all_data, df], ignore_index=True)

    all_data.to_csv('yelp_reviews.csv', index=False)
    print("Data scraped and saved to yelp_reviews.csv")


Data scraped and saved to yelp_reviews.csv
