# Webscraping Yelp: Kates Seafood 

Author: Megan Schaeb  
Date: 8/14/2023

In [1]:
import requests
import bs4

## Scraping Function

In [2]:
def scrape_page(soup, all_reviews, page):
    '''
    Scrape current page of reviews
    
    Input
    -----
    soup: parsed text for current page
    all_reviews: dictionary of reviews that have been processed
    page: integer representing the current page of reviews
    
    Return
    ------
    all_reviews
    page
    '''
    
    # isolate review section of page
    review_section = soup.find_all("ul", {"class": "undefined list__09f24__ynIEd"})
    review_section = review_section[7]
    
    # isolate individual reviews
    review_blocks = review_section.find_all("li", {"class": "margin-b5__09f24__pTvws border-color--default__09f24__NPAKY"})

    # pull review information
    for i in range(0, len(review_blocks)):
        reviewer_name = review_blocks[i].find("a", {"class":"css-19v1rkv", "role":"link"}).text
        star_rating = review_blocks[i].find("div", {"role": "img"}).get("aria-label")
        text = review_blocks[i].find("span", {"class": "raw__09f24__T4Ezm", "lang": "en"}).text
        review_date = review_blocks[i].find("span", {"class": "css-chan6m"}).text
        reviewer_loc = review_blocks[i].find("span", {"class": "css-qgunke"}).text
        review_info = {"reviewer_name": reviewer_name, 
                   "reviewer_loc": reviewer_loc, 
                   "review_date": review_date,
                   "star_rating": star_rating,
                   "review_text": text}
        all_reviews["Review {num:.0f}".format(num=(1 + i + (page * 10)))] = review_info
            
      
    return all_reviews


def good_code(response):
    '''
    Check if page response code is 200
    
    Input
    -----
    response: page request
    
    Return
    ------
    True if code is 200, False otherwise
    '''
    
    return response.status_code == 200
    

def scrape_kates(start_url, all_reviews={}, page=0):
    
    
    # access start page
    current_page = page
    
    response = requests.get(start_url)
    if not good_code(response):
        return "Error"
    
    # put html through text parser
    soup = bs4.BeautifulSoup(response.text, "html5lib")
    
    # get total number of pages of reviews
    page_nav = soup.find("div", {"aria-label":"Pagination navigation", "role": "navigation"})
    page_nums = page_nav.find("span", {"class": "css-chan6m"}).text
    total_pages = int(page_nums.split("of ")[1])
    
    soup_pages = [soup]
    for i in range(1, total_pages):
    
        multiplier = len(soup_pages)
        link = "https://www.yelp.com/biz/kates-fried-seafood-and-ice-cream-brewster-2?start=" + str(10*multiplier)
        next_response = requests.get(link)
        if not good_code(next_response):
            return "Error"
        # put html through text parser
        next_soup = bs4.BeautifulSoup(next_response.text, "html5lib")
        soup_pages.append(next_soup)
            
    print(len(soup_pages))
   
    for page in soup_pages:
        all_reviews = scrape_page(page, all_reviews, current_page)
        current_page += 1
        
    return all_reviews

In [3]:
# scrape review data
review_data = scrape_kates("https://www.yelp.com/biz/kates-fried-seafood-and-ice-cream-brewster-2", all_reviews={}, page=0)

13


In [9]:
# scraped review data
review_data["Review 1"]

{'reviewer_name': 'Ann P.',
 'reviewer_loc': 'South Dennis, MA',
 'review_date': '6/12/2023',
 'star_rating': '5 star rating',
 'review_text': "A group of us enjoyed lunch today. Service was flawless and the food, amazing. I love fish sandwiches and Kate's version is wonderful. The setting is perfect for outside dining. There are places you go that feel like home. Kate's is one of them with a welcoming vibe and tasty food."}

## Build DataFrame & Export Data to Excel

Export scraped review data to Excel

In [5]:
import pandas as pd

In [6]:
review_df = pd.DataFrame(review_data)
review_df = review_df.T

In [7]:
review_df

Unnamed: 0,reviewer_name,reviewer_loc,review_date,star_rating,review_text
Review 1,Ann P.,"South Dennis, MA",6/12/2023,5 star rating,A group of us enjoyed lunch today. Service was...
Review 2,K S.,"Newtown, CT",8/7/2023,1 star rating,Are you kidding me? Here we go with no real m...
Review 3,David S.,"San Francisco, CA",8/30/2022,4 star rating,Was back at Kate's for the 2nd time this summe...
Review 4,Lori G.,"Maricopa, AZ",6/7/2022,5 star rating,Kate's is open again this summer and they've u...
Review 5,Brittany K.,"San Francisco, CA",6/22/2021,5 star rating,"The clam strips were fantastic, Cape Cod Reube..."
...,...,...,...,...,...
Review 120,Jan A.,"South Salem, NY",9/14/2008,4 star rating,Window service. I have only every had the ice ...
Review 121,K. L.,"Trumbull, CT",8/19/2013,3 star rating,This place is OK but not the best ice cream. O...
Review 122,Brooke K.,"Woburn, MA",5/14/2012,5 star rating,"When my husband, who's been coming here for ye..."
Review 123,Tom H.,"Braintree, MA",9/1/2013,5 star rating,Hidden gem. Drive past this place for 10 year...


In [8]:
# review_df.to_excel("KatesReviews.xlsx", sheet_name='Sheet1')