In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from bs4 import BeautifulSoup
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException
import time

In [2]:
from config import nyt_api_key

## Create DataFrame of Target Restaurants Reviewed by NY Times

In [3]:
review_url = 'https://www.nytimes.com/reviews/dining'

### Scrape HTML from NYT Dining Page

In [4]:
def create_nyt_soup_object(url):
    
    # Selenium Driver
    dr = webdriver.Chrome()
    dr.get(url)
    WebDriverWait(dr, 100)
    
    # 'Show More' button needs to be 'pressed'
    button = dr.find_element_by_xpath("//button[.='Show More']")

    while True:

        try:

            # Click button
            button.click()
            time.sleep(5)

        # Exception raised when end of reviews is reached
        except StaleElementReferenceException:

            break

    soup = BeautifulSoup(dr.page_source, 'html.parser')

    dr.close()
    dr.quit()
    
    return soup
    
nyt_soup_object = create_nyt_soup_object(review_url)

### Scrape HTML for Restaurant, Review, and URLs and create Dictionary

In [166]:
def nyt_page_scrape(soup_object):

    review_dict = []

    for reviewed in soup_object.find_all('span', itemprop='reviewRating'):
        
        r_name = reviewed.find_parent('div').find('h2').get_text()
        r_rating = reviewed.find('span').get_text()
        r_link_1 = reviewed.find_parent('div').find_parent('div').find('a', href=True)['href']
        r_link_2 = reviewed.find_parent('div').find_parent('article').find('footer').find('a', href=True)['href']
        r_cuisine = reviewed.find_parent('div').find('li', itemprop='servesCuisine').get_text()
        
        try:
            r_neighborhood = reviewed.find_parent('div').find('li', itemprop='addressLocality').get_text()
            
        except AttributeError:
            r_neighborhood = 'NaN'
            
        r_reviewer = reviewed.find_parent('div').find('p', itemprop='author').find('span').get_text()
        r_review_date = reviewed.find_parent('div').find_parent('article').find('time').get_text()

        print(r_name, ' --> ', r_rating, ', scraped')

        restaurant_dict = {'name': r_name,
                           'rating': r_rating,
                           'reviewer': r_reviewer,
                           'review_date': r_review_date,
                           'neighborhood': r_neighborhood,
                           'cuisine': r_cuisine,
                           'review_link_1': r_link_1,
                           'review_link_2': r_link_2}

        review_dict.append(restaurant_dict)
    
    return review_dict

reviews = nyt_page_scrape(nyt_soup_object)

Hanon  -->  2 star , scraped
Del Posto  -->  3 star , scraped
The Freakin Rican  -->  1 star , scraped
Wayan  -->  2 star , scraped
Niche  -->  1 star , scraped
Haenyeo  -->  2 star , scraped
Standard Grill  -->  2 star , scraped
Violet  -->  1 star , scraped
Odo  -->  3 star , scraped
Cka Ka Qellu  -->  2 star , scraped
Madame Vo BBQ  -->  1 star , scraped
Oxalis  -->  1 star , scraped
Bistro Pierre Lapin  -->  1 star , scraped
Cherry Point  -->  2 star , scraped
Benno  -->  3 star , scraped
Bang Bar  -->  1 star , scraped
Hwaban  -->  2 star , scraped
Bluebird London  -->  0.5 star , scraped
The Four Seasons Restaurant  -->  1 star , scraped
Saint Julivert Fisherie  -->  1 star , scraped
Adda Indian Canteen  -->  2 star , scraped
Misi  -->  3 star , scraped
Mama’s Too  -->  1 star , scraped
Henry at Life Hotel by JJ  -->  1 star , scraped
Hunan Slurp  -->  2 star , scraped
Atomix  -->  3 star , scraped
Manhatta  -->  1 star , scraped
Village Cafe  -->  2 star , scraped
Kopitiam  --> 

Kefi  -->  1 star , scraped
L'Artusi  -->  1 star , scraped
Buttermilk Channel  -->  1 star , scraped
Corton  -->  3 star , scraped
Market Table  -->  2 star , scraped
Bobo  -->  1 star , scraped
Kanoyama  -->  1 star , scraped
Sushi Azabu  -->  1 star , scraped
Candle 79  -->  1 star , scraped
Socarrat Paella Bar  -->  1 star , scraped
Delicatessen  -->  1 star , scraped
James  -->  1 star , scraped
Michael's  -->  0.75 star , scraped
Perbacco  -->  2 star , scraped
Scarpetta  -->  3 star , scraped
Szechuan Gourmet  -->  2 star , scraped
Gottino  -->  1 star , scraped
Terroir  -->  1 star , scraped
Artisanal  -->  2 star , scraped
Commerce  -->  1 star , scraped
La Sirène  -->  1 star , scraped
Bar Boulud  -->  2 star , scraped
Second Avenue Deli  -->  1 star , scraped
Ilili  -->  1 star , scraped
Blue Ribbon Sushi Bar and Grill  -->  2 star , scraped
Barbuto  -->  1 star , scraped
Harry Cipriani  -->  0.25 star , scraped
Moim  -->  1 star , scraped
Gemma  -->  1 star , scraped
Peter 

In [265]:
reviews

[{'name': 'Hanon',
  'rating': '2 star',
  'reviewer': 'Pete Wells',
  'review_date': 'May 21, 2019',
  'neighborhood': 'Williamsburg',
  'cuisine': 'Japanese',
  'review_link_1': 'https://www.nytimes.com/2019/05/21/dining/hanon-review.html',
  'review_link_2': 'https://www.nytimes.com/2019/05/21/dining/hanon-review.html?rref=collection%2Fcollection%2Frestaurant-guide'},
 {'name': 'Del Posto',
  'rating': '3 star',
  'reviewer': 'Pete Wells',
  'review_date': 'May 14, 2019',
  'neighborhood': 'Chelsea',
  'cuisine': 'Italian',
  'review_link_1': 'https://www.nytimes.com/2019/05/14/dining/del-posto-review-pete-wells.html',
  'review_link_2': 'https://www.nytimes.com/2019/05/14/dining/del-posto-review-pete-wells.html?rref=collection%2Fcollection%2Frestaurant-guide'},
 {'name': 'The Freakin Rican',
  'rating': '1 star',
  'reviewer': 'Pete Wells',
  'review_date': 'May 7, 2019',
  'neighborhood': 'Astoria',
  'cuisine': 'Caribbean, Latin American',
  'review_link_1': 'https://www.nytimes.

In [5]:
len(reviews)

NameError: name 'reviews' is not defined

In [267]:
reviews[100]

{'name': 'Cut by Wolfgang Puck',
 'rating': '1 star',
 'reviewer': 'Pete Wells',
 'review_date': 'Dec. 20, 2016',
 'neighborhood': 'Financial District',
 'cuisine': 'American, Steak Houses',
 'review_link_1': 'https://www.nytimes.com/2016/12/20/dining/cut-by-wolfgang-puck-review.html',
 'review_link_2': 'https://www.nytimes.com/2016/12/20/dining/cut-by-wolfgang-puck-review.html?rref=collection%2Fcollection%2Frestaurant-guide'}

In [276]:
pd.set_option('max_colwidth',100)
df = pd.DataFrame(reviews, columns=col_order)
df['review_link_1']

0                                            https://www.nytimes.com/2019/05/21/dining/hanon-review.html
1                             https://www.nytimes.com/2019/05/14/dining/del-posto-review-pete-wells.html
2                     https://www.nytimes.com/2019/05/07/dining/the-freakin-rican-restaurant-review.html
3                                 https://www.nytimes.com/2019/04/23/dining/wayan-restaurant-review.html
4                                    https://www.nytimes.com/2019/04/16/dining/niche-review-mazemen.html
5                    https://www.nytimes.com/2019/04/09/dining/haenyeo-restaurant-review-jenny-kwak.html
6                                   https://www.nytimes.com/2019/04/02/dining/standard-grill-review.html
7                                     https://www.nytimes.com/2019/03/26/dining/violet-pizza-review.html
8                                   https://www.nytimes.com/2019/03/19/dining/odo-restaurant-review.html
9                                     https://www.nytim

In [268]:
col_order = ['name', 'rating', 'review_date', 'reviewer', 'neighborhood', 'cuisine', 'review_link_1', 'review_link_2']

In [273]:
df = df[col_order]

In [13]:
#df.to_csv('reviews.csv', encoding='utf-8')

In [40]:
df.groupby('reviewer')['rating'].value_counts()

reviewer            rating   
Amanda Hesser       2 star         3
                    0.75 star      1
                    1 star         1
Bryan Miller        1 star         5
                    2 star         4
Eric Asimov         1 star         6
                    2 star         4
                    3 star         1
Frank Bruni         1 star        59
                    2 star        36
                    3 star         9
                    0.75 star      7
                    0.25 star      2
Frank J. Prial      2 star         1
Julia Moskin        2 star         1
Ligaya Mishan       1 star         1
                    2 star         1
Marian Burros       2 star         3
                    0.5 star       2
                    1 star         1
Mimi Sheraton       1 star         1
Pete Wells          2 star       131
                    1 star        93
                    3 star        39
                    0.75 star      9
                    0.5 star       5
        

In [15]:
len(df[df['reviewer'] == 'Pete Wells']['name'].unique())

282

### Scraping Restaurant Reviews Page for URLs

In [7]:
reviews_only_url = 'https://www.nytimes.com/column/restaurant-review'

In [8]:
nyt_soup_object = create_nyt_soup_object(reviews_only_url)

In [10]:
url_base = 'https://www.nytimes.com'
review_urls = []

for reviews in nyt_soup_object.find_all('div', class_='css-13mho3u'):

    for link in reviews.find_all('a', href=True):
        
        r_url = link['href']
        final_url = url_base + r_url
        review_urls.append(final_url)        

In [11]:
len(review_urls)

988

In [12]:
review_urls = set(review_urls)
len(review_urls)

985

In [13]:
df_review_urls = pd.DataFrame(review_urls)

In [14]:
for review_url in review_urls:
    print(review_url)

https://www.nytimes.com/2010/07/28/dining/reviews/28rest.html
https://www.nytimes.com/1999/08/11/dining/restaurants-a-jewel-box-in-a-town-house.html
https://www.nytimes.com/2017/01/31/dining/chumleys-review-bar-west-village.html
https://www.nytimes.com/2010/11/03/dining/reviews/03rest.html
https://www.nytimes.com/2001/02/28/dining/restaurants-a-simple-equation-for-the-financial-district.html
https://www.nytimes.com/2001/09/05/dining/restaurants-parisian-elegance-on-the-upper-east-side.html
https://www.nytimes.com/2016/02/24/dining/little-pepper-review.html
https://www.nytimes.com/2017/04/04/dining/babbo-review-pete-wells.html
https://www.nytimes.com/1999/02/24/dining/restaurants-american-food-indian-spices.html
https://www.nytimes.com/2018/10/02/dining/village-cafe-review-brooklyn.html
https://www.nytimes.com/2000/08/30/dining/restaurants-gazetteer-of-italian-fare-with-eccentricity.html
https://www.nytimes.com/2010/10/13/dining/13rest.html
https://www.nytimes.com/2009/05/20/dining/revi

In [195]:
#df_review_urls.to_csv('reviews_urls_raw.csv', encoding='utf-8')

In [17]:
review_urls = list(review_urls)

In [18]:
review_urls

['https://www.nytimes.com/2010/07/28/dining/reviews/28rest.html',
 'https://www.nytimes.com/1999/08/11/dining/restaurants-a-jewel-box-in-a-town-house.html',
 'https://www.nytimes.com/2017/01/31/dining/chumleys-review-bar-west-village.html',
 'https://www.nytimes.com/2010/11/03/dining/reviews/03rest.html',
 'https://www.nytimes.com/2001/02/28/dining/restaurants-a-simple-equation-for-the-financial-district.html',
 'https://www.nytimes.com/2001/09/05/dining/restaurants-parisian-elegance-on-the-upper-east-side.html',
 'https://www.nytimes.com/2016/02/24/dining/little-pepper-review.html',
 'https://www.nytimes.com/2017/04/04/dining/babbo-review-pete-wells.html',
 'https://www.nytimes.com/1999/02/24/dining/restaurants-american-food-indian-spices.html',
 'https://www.nytimes.com/2018/10/02/dining/village-cafe-review-brooklyn.html',
 'https://www.nytimes.com/2000/08/30/dining/restaurants-gazetteer-of-italian-fare-with-eccentricity.html',
 'https://www.nytimes.com/2010/10/13/dining/13rest.html'

In [82]:
len(review_urls)

985

In [102]:
test_url = ['https://www.nytimes.com/2009/03/25/dining/reviews/25rest.html']

In [165]:
def parse_urls(urls):
    
    list_of_dicts = []
    error_dict = []
    
    recent_count = 0
    modern_count = 0
    archive_count = 0
    
    for review_url in urls:
        
        try:
            time.sleep(2)

            # GET HTML
            r = requests.get(review_url)
            c = r.content

            # CREATE BeautifulSoup Object
            soup = BeautifulSoup(c, 'html.parser')

            # "Check" object: If this object exists, the NYT Review is Recent
            check_new = soup.find('article').find('div', class_='css-53u6y8')

            # "Check" object: If this object exists, the NYT Review is an Archive
            try:
                check_archived = soup.find('span', class_='kicker-label').find('a', text='Archives')
            except:
                pass
            
            # Check object: If this object exists, the NYT Review is an annoying, 'Modern' format
            check_broken_modern = soup.find('aside', class_='review-details restaurant-details')
            
            # Check if Recent
            if check_new == None:

                # If Not Recent, Check if Archived
                if check_archived == None:
                    
                    # If not Archived, check if Broken Modern
                    if check_broken_modern == None:
                        print('Broken Modern Review: ', review_url)
                    
                    else:
                        
                        print('Modern Review: ', review_url)
                        list_of_dicts.append(parse_modern_reviews(soup))
                        modern_count += 1

                # If Archived:
                else:
                    print('Archived Review: ', review_url)
                    archive_count += 1

            # If Recent
            else:
                print('Recent Review: ', review_url)
                list_of_dicts.append(parse_recent_reviews(soup))
                recent_count += 1
            
        except:
            print('***Error: ', review_url)
            error_dict.append(review_url)
            
    return list_of_dicts, len(error_dict), error_dict, recent_count, modern_count, archive_count
    
review_dict, len_errors, error_dict, recent_count, modern_count, archive_count = parse_urls(review_urls[0: 100])

Modern Review:  https://www.nytimes.com/2010/07/28/dining/reviews/28rest.html
Archived Review:  https://www.nytimes.com/1999/08/11/dining/restaurants-a-jewel-box-in-a-town-house.html
Recent Review:  https://www.nytimes.com/2017/01/31/dining/chumleys-review-bar-west-village.html
Modern Review:  https://www.nytimes.com/2010/11/03/dining/reviews/03rest.html
Archived Review:  https://www.nytimes.com/2001/02/28/dining/restaurants-a-simple-equation-for-the-financial-district.html
Archived Review:  https://www.nytimes.com/2001/09/05/dining/restaurants-parisian-elegance-on-the-upper-east-side.html
Recent Review:  https://www.nytimes.com/2016/02/24/dining/little-pepper-review.html
Recent Review:  https://www.nytimes.com/2017/04/04/dining/babbo-review-pete-wells.html
Archived Review:  https://www.nytimes.com/1999/02/24/dining/restaurants-american-food-indian-spices.html
Recent Review:  https://www.nytimes.com/2018/10/02/dining/village-cafe-review-brooklyn.html
Archived Review:  https://www.nytim

Modern Review:  https://www.nytimes.com/2009/10/28/dining/reviews/28rest.html
Modern Review:  https://www.nytimes.com/2008/12/03/dining/reviews/03rest.html
Modern Review:  https://www.nytimes.com/2011/03/23/dining/reviews/23rest.html
Modern Review:  https://www.nytimes.com/2012/08/01/dining/reviews/reynard-in-williamsburg-brooklyn-restaurant-review.html
Recent Review:  https://www.nytimes.com/2018/02/06/dining/grant-achatz-office-aviary-review.html
Modern Review:  https://www.nytimes.com/2010/03/10/dining/reviews/10Rest.html
Archived Review:  https://www.nytimes.com/2000/01/19/dining/restaurants-vigorous-fare-in-an-underground-lair.html
Archived Review:  https://www.nytimes.com/2005/01/12/dining/fish-thats-raw-but-never-undressed.html
Archived Review:  https://www.nytimes.com/2001/07/25/dining/restaurants-just-swashbuckle-in-and-brandish-a-fork.html
Modern Review:  https://www.nytimes.com/2010/01/20/dining/reviews/20rest.html
Modern Review:  https://www.nytimes.com/2008/04/16/dining/16

In [169]:
review_dict, len_errors, error_dict, recent_count, modern_count, archive_count = parse_urls(review_urls[100: 200])

Modern Review:  https://www.nytimes.com/2011/11/30/dining/reviews/fatty-cue-nyc-restaurant-review.html
Modern Review:  https://www.nytimes.com/2009/12/09/dining/reviews/09rest.html
Archived Review:  https://www.nytimes.com/2006/01/18/dining/reviews/one-reason-to-stop-resenting-mondays.html
Archived Review:  https://www.nytimes.com/2001/09/19/dining/restaurants-an-alliance-of-sun-dappled-cuisines.html
Archived Review:  https://www.nytimes.com/1999/10/27/dining/restaurants-the-neighbor-you-know.html
Modern Review:  https://www.nytimes.com/2009/05/27/dining/reviews/27rest.html
Archived Review:  https://www.nytimes.com/2006/09/13/dining/reviews/a-welcoming-place-to-raise-a-glass.html
Modern Review:  https://www.nytimes.com/2012/08/22/dining/reviews/la-vara-in-cobble-hill-brooklyn-restaurant-review.html
Recent Review:  https://www.nytimes.com/2015/12/23/dining/wassail-review.html
Recent Review:  https://www.nytimes.com/2015/11/11/dining/jams-jonathan-waxman-review.html
***Error:  https://ww

Archived Review:  https://www.nytimes.com/2002/10/30/dining/restaurants-from-an-indian-chef-tandoori-fare-and-wild-cards.html
Recent Review:  https://www.nytimes.com/2015/05/27/dining/restaurant-review-aquavit-in-midtown.html
Archived Review:  https://www.nytimes.com/2006/08/02/dining/reviews/food-youd-almost-rather-hug-than-eat.html
Modern Review:  https://www.nytimes.com/2009/02/11/dining/reviews/11rest.html
Recent Review:  https://www.nytimes.com/2016/03/16/dining/insa-restaurant-review.html
Recent Review:  https://www.nytimes.com/2019/02/26/dining/oxalis-restaurant-review.html
Recent Review:  https://www.nytimes.com/2018/06/26/dining/the-islands-review.html
Archived Review:  https://www.nytimes.com/2006/10/25/dining/reviews/for-bond-traders-and-other-carnivores.html
Archived Review:  https://www.nytimes.com/2006/07/12/dining/reviews/steaks-with-lots-of-asides.html
Modern Review:  https://www.nytimes.com/2011/02/02/dining/reviews/02rest.html
Archived Review:  https://www.nytimes.com

In [166]:
nan_var = float('NaN')

def parse_recent_reviews(soup):
    
    # Extract review text
    article = []
        
    for p in soup.find_all('p', class_='css-18icg9x evys1bk0'):
            
        article.append(p.get_text())

    article = ' '.join(article)

    # Extract Restaurant Name, Stars + Neighborhood
    boa = soup.find('div', {'class': 'bottom-of-article'})

    # Restaurant Name
    name = boa.find('h4').get_text()

    # Restaurant Rating
    try:
        rating = boa.find('span', {'class': ['css-z4hz5', 'css-1y5uc8z']}).get_text()
    except:
        rating = boa.find('div', {'class': 'css-1y5uc8z'}).find('span').get_text()
        
    # Restaurant Neighborhood
    hood = boa.find('dd', class_='neighborhood').get_text()

    # Critic's Pick?
    if boa.find('span', {'class': 'css-14dcre2'}):
        cpick = True
    else:
        cpick = False

    # Restaurant Atmosphere
    try:
        atmosphere = boa.find('div', class_='atmosphere').find('dd').get_text()
    except:
        atmosphere = nan_var

    # Sound
    try:
        sound = boa.find('div', class_='noiseLevel').find('dd').get_text()
    except:
        sound = nan_var

    # Recommendations
    try:
        recs = boa.find('div', class_='recommendedDishes').find('dd').get_text()
    except:
        recs = nan_var

    # Menu Link
    try:
        menu_link = boa.find('div', class_='menuLink').find('a', href=True)['href']
    except:
        menu_link = nan_var

    # Drinks
    try:
        drinks = boa.find('div', class_='alcoholInfo').find('dd').get_text()
    except:
        drinks = nan_var

    # Price
    try:
        price = boa.find('dd', class_='price').get_text()
    except:
        price = nan_var

    # Hours
    try:
        hours = boa.find('dd', class_='hours').get_text()
    except:
        hours = nan_var

    # Reservations
    try:
        resis = boa.find('dd', class_='reservations').get_text()
    except:
        resis = nan_var

    # URL
    try:
        url = soup.find('meta', {'property':'og:url'}).get('content')
    except:
        url = nan_var
            
    # Extract Meta Tags - Reviewer, Date, Keywords, Article_ID
    for tag in soup.find_all('meta'):

        if tag.get('name', None) == 'byl':
            reviewer = tag.get('content', None).replace('By ', '').strip()

        if tag.get('name', None) == 'pdate':
            rev_date = tag.get('content', None).strip()

        if tag.get('name', None) == 'news_keywords':
            keywords = tag.get('content', None).strip()

        if tag.get('name', None) == 'articleid':
            article_id = tag.get('content', None).strip()
            
    rev_dict = {'name': name,
                'review_url': url,
                'review_date': rev_date, 
                'reviewer': reviewer, 
                'rating': rating, 
                'neighborhood': hood,
                'critic_pick': cpick,
                'atmosphere': atmosphere,
                'sound': sound,
                'recommendations': recs,
                'menu': menu_link,
                'drinks': drinks,
                'price': price,
                'hours': hours,
                'reservations': resis,
                'keywords': keywords,
                'article_id': article_id,
                'review': article}
    
    return rev_dict

In [163]:
def parse_modern_reviews(soup):
    
    for tag in soup.find_all('meta'):
        #Reviewer
        if tag.get('name', None) == 'author':
            reviewer = tag.get('content', None).strip()
        #Review Date
        if tag.get('name', None) == 'pdate':
            rev_date = tag.get('content', None).strip()
        #Keywords
        if tag.get('name', None)== 'news_keywords':
            keywords = tag.get('content', None).strip()
    
    #End of Article summary information:
    EOA = soup.find('aside', class_='review-details restaurant-details')
    
    #restaurant name
    name = EOA.find('h4').get_text()

    #Rating
    if EOA.find('li', class_='critic-star-rating'):
        rating = EOA.find('li', class_='critic-star-rating').get_text()
    elif EOA.find('li', class_='critic-word-rating'):
        rating = EOA.find('li', class_='critic-word-rating').get_text()
    else:
        rating = float('nan')

    #Neighborhood
    hood = EOA.find('p', itemprop='addressLocality').get_text()

    #Critic pick T/F
    if EOA.find('li', class_='critics-pick'):
        cpick = True
    else:
        cpick = False
    
    #Atmosphere
    try:
        atmosphere = EOA.find('span', text='Atmosphere').parent.find('span', itemprop='review').get_text()
    except:
        atmosphere = float('nan')
    
    #Sound
    try:
        sound = EOA.find('span', text='Sound').parent.find('span', itemprop='review').get_text()
    except:
        sound = float('nan')
    
    #Menu Recommendations
    try:
        recs = EOA.find('span', text='Recommended Dishes').parent.find('span', itemprop='menu').get_text()
    except:
        recs = float('nan')
    
    #Menu Link
    try:
        menu_link = EOA.find('span', text='Menu').parent.find('span', itemprop='menu').find('a').get('href')
    except:
        menu_link = float('nan')
    
    #Drinks
    try:
        drinks = EOA.find('span', text='Drinks and Wine').parent.find('span', itemprop='menu').get_text()
    except:
        drinks = float('nan')
    
    #Price
    try:
        price = EOA.find('span', itemprop='priceRange').get_text()
    except:
        price = float('nan')
    
    #Hours
    try:
        hours = EOA.find('time').get('datetime')
    except:
        hours = float('nan')
    
    #Reservations
    try:
        resis = EOA.find('span', itemprop='acceptsReservations').get_text()
    except:
        resis = float('nan')
    
    #Review Text
    review = []
    
    for p in soup.find_all('p', class_='story-body-text story-content'):
        if p.get('data-para-count') == '8':
            break
        else:
            review.append(p.get_text())
            
    article = ' '.join(review)

    #Article ID
    article_id = soup.find('meta', itemprop='identifier').get('content')
    
    #Review URL
    url = soup.find('meta', {'property':'og:url'}).get('content')
    
    rev_dict = {'name': name,
                'review_url': url,
                'review_date': rev_date, 
                'reviewer': reviewer, 
                'rating': rating, 
                'neighborhood': hood,
                'critic_pick': cpick,
                'atmosphere': atmosphere,
                'sound': sound,
                'recommendations': recs,
                'menu': menu_link,
                'drinks': drinks,
                'price': price,
                'hours': hours,
                'reservations': resis,
                'article_id': article_id,
                'keywords': keywords,
                'review': article}
    
    return rev_dict 

In [162]:
def parse_archived_reviews(soup):
    
    # BeautifulSoup object for body text extraction
    pars = soup.find('article').find_all('div', {'class': ['story-body-supplemental', 'css-53u6y8']})

    # Extract body text
    for par in pars:
        
        articles = []
        
        x = par.find_all('p')

        for i in x:
            articles.append(i.get_text())
    
    return articles