In [220]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from bs4 import BeautifulSoup
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException
import time

In [221]:
from config import nyt_api_key

## Create DataFrame of Target Restaurants Reviewed by NY Times

In [222]:
review_url = 'https://www.nytimes.com/reviews/dining'

### Scrape HTML from NYT Dining Page

In [223]:
def create_nyt_soup_object(url):
    
    # Selenium Driver
    dr = webdriver.Chrome()
    dr.get(url)
    WebDriverWait(dr, 100)
    
    # 'Show More' button needs to be 'pressed'
    button = dr.find_element_by_xpath("//button[.='Show More']")

    while True:

        try:

            # Click button
            button.click()
            time.sleep(5)

        # Exception raised when end of reviews is reached
        except StaleElementReferenceException:

            break

    soup = BeautifulSoup(dr.page_source, 'html.parser')

    dr.close()
    dr.quit()
    
    return soup

### Scrape HTML for Restaurant, Review, and URLs and create Dictionary

In [166]:
def nyt_page_scrape(soup_object):

    review_dict = []

    for reviewed in soup_object.find_all('span', itemprop='reviewRating'):
        
        r_name = reviewed.find_parent('div').find('h2').get_text()
        r_rating = reviewed.find('span').get_text()
        r_link_1 = reviewed.find_parent('div').find_parent('div').find('a', href=True)['href']
        r_link_2 = reviewed.find_parent('div').find_parent('article').find('footer').find('a', href=True)['href']
        r_cuisine = reviewed.find_parent('div').find('li', itemprop='servesCuisine').get_text()
        
        try:
            r_neighborhood = reviewed.find_parent('div').find('li', itemprop='addressLocality').get_text()
            
        except AttributeError:
            r_neighborhood = 'NaN'
            
        r_reviewer = reviewed.find_parent('div').find('p', itemprop='author').find('span').get_text()
        r_review_date = reviewed.find_parent('div').find_parent('article').find('time').get_text()

        print(r_name, ' --> ', r_rating, ', scraped')

        restaurant_dict = {'name': r_name,
                           'rating': r_rating,
                           'reviewer': r_reviewer,
                           'review_date': r_review_date,
                           'neighborhood': r_neighborhood,
                           'cuisine': r_cuisine,
                           'review_link_1': r_link_1,
                           'review_link_2': r_link_2}

        review_dict.append(restaurant_dict)
    
    return review_dict

reviews = nyt_page_scrape(nyt_soup_object)

Hanon  -->  2 star , scraped
Del Posto  -->  3 star , scraped
The Freakin Rican  -->  1 star , scraped
Wayan  -->  2 star , scraped
Niche  -->  1 star , scraped
Haenyeo  -->  2 star , scraped
Standard Grill  -->  2 star , scraped
Violet  -->  1 star , scraped
Odo  -->  3 star , scraped
Cka Ka Qellu  -->  2 star , scraped
Madame Vo BBQ  -->  1 star , scraped
Oxalis  -->  1 star , scraped
Bistro Pierre Lapin  -->  1 star , scraped
Cherry Point  -->  2 star , scraped
Benno  -->  3 star , scraped
Bang Bar  -->  1 star , scraped
Hwaban  -->  2 star , scraped
Bluebird London  -->  0.5 star , scraped
The Four Seasons Restaurant  -->  1 star , scraped
Saint Julivert Fisherie  -->  1 star , scraped
Adda Indian Canteen  -->  2 star , scraped
Misi  -->  3 star , scraped
Mama’s Too  -->  1 star , scraped
Henry at Life Hotel by JJ  -->  1 star , scraped
Hunan Slurp  -->  2 star , scraped
Atomix  -->  3 star , scraped
Manhatta  -->  1 star , scraped
Village Cafe  -->  2 star , scraped
Kopitiam  --> 

Kefi  -->  1 star , scraped
L'Artusi  -->  1 star , scraped
Buttermilk Channel  -->  1 star , scraped
Corton  -->  3 star , scraped
Market Table  -->  2 star , scraped
Bobo  -->  1 star , scraped
Kanoyama  -->  1 star , scraped
Sushi Azabu  -->  1 star , scraped
Candle 79  -->  1 star , scraped
Socarrat Paella Bar  -->  1 star , scraped
Delicatessen  -->  1 star , scraped
James  -->  1 star , scraped
Michael's  -->  0.75 star , scraped
Perbacco  -->  2 star , scraped
Scarpetta  -->  3 star , scraped
Szechuan Gourmet  -->  2 star , scraped
Gottino  -->  1 star , scraped
Terroir  -->  1 star , scraped
Artisanal  -->  2 star , scraped
Commerce  -->  1 star , scraped
La Sirène  -->  1 star , scraped
Bar Boulud  -->  2 star , scraped
Second Avenue Deli  -->  1 star , scraped
Ilili  -->  1 star , scraped
Blue Ribbon Sushi Bar and Grill  -->  2 star , scraped
Barbuto  -->  1 star , scraped
Harry Cipriani  -->  0.25 star , scraped
Moim  -->  1 star , scraped
Gemma  -->  1 star , scraped
Peter 

In [265]:
reviews

[{'name': 'Hanon',
  'rating': '2 star',
  'reviewer': 'Pete Wells',
  'review_date': 'May 21, 2019',
  'neighborhood': 'Williamsburg',
  'cuisine': 'Japanese',
  'review_link_1': 'https://www.nytimes.com/2019/05/21/dining/hanon-review.html',
  'review_link_2': 'https://www.nytimes.com/2019/05/21/dining/hanon-review.html?rref=collection%2Fcollection%2Frestaurant-guide'},
 {'name': 'Del Posto',
  'rating': '3 star',
  'reviewer': 'Pete Wells',
  'review_date': 'May 14, 2019',
  'neighborhood': 'Chelsea',
  'cuisine': 'Italian',
  'review_link_1': 'https://www.nytimes.com/2019/05/14/dining/del-posto-review-pete-wells.html',
  'review_link_2': 'https://www.nytimes.com/2019/05/14/dining/del-posto-review-pete-wells.html?rref=collection%2Fcollection%2Frestaurant-guide'},
 {'name': 'The Freakin Rican',
  'rating': '1 star',
  'reviewer': 'Pete Wells',
  'review_date': 'May 7, 2019',
  'neighborhood': 'Astoria',
  'cuisine': 'Caribbean, Latin American',
  'review_link_1': 'https://www.nytimes.

In [268]:
col_order = ['name', 'rating', 'review_date', 'reviewer', 'neighborhood', 'cuisine', 'review_link_1', 'review_link_2']

In [273]:
df = df[col_order]

In [13]:
#df.to_csv('reviews.csv', encoding='utf-8')

### Scraping Restaurant Reviews Page for URLs

In [7]:
reviews_only_url = 'https://www.nytimes.com/column/restaurant-review'

In [224]:
#nyt_soup_object = create_nyt_soup_object(reviews_only_url)

In [3]:
url_base = 'https://www.nytimes.com'
review_urls = []

for reviews in nyt_soup_object.find_all('div', class_='css-13mho3u'):

    for link in reviews.find_all('a', href=True):
        
        r_url = link['href']
        final_url = url_base + r_url
        review_urls.append(final_url)        

NameError: name 'nyt_soup_object' is not defined

In [232]:
len(review_urls)

986

In [13]:
review_urls = set(review_urls)
len(review_urls)

986

In [14]:
df_review_urls = pd.DataFrame(review_urls)

In [16]:
#df_review_urls.to_csv('reviews_urls_raw.csv', encoding='utf-8')

In [229]:
df_review_links = pd.read_csv('reviews_urls_raw.csv')

In [230]:
review_urls = df_review_links['0']

In [231]:
review_urls = list(review_urls)

In [8]:
test_url = ['https://www.nytimes.com/2009/03/25/dining/reviews/25rest.html']

In [225]:
nan_var = float('NaN')

# RECENT REVIEWS PARSING FUNCTION
def parse_recent_reviews(soup):
    
    bad_str = ' - The New York Times'
    title = soup.find('title').get_text().replace(bad_str,'')
    
    # Extract review text
    article = []
        
    for p in soup.find_all('p', class_='css-18icg9x evys1bk0'):
            
        article.append(p.get_text())

    article = ' '.join(article)

    # Extract Restaurant Name, Stars + Neighborhood
    boa = soup.find('div', {'class': 'bottom-of-article'})

    # Restaurant Name
    name = boa.find('h4').get_text()

    # Restaurant Rating
    try:
        rating = boa.find('span', {'class': ['css-z4hz5', 'css-1y5uc8z']}).get_text()
    except:
        rating = boa.find('div', {'class': 'css-1y5uc8z'}).find('span').get_text()
        
    # Restaurant Neighborhood
    hood = boa.find('dd', class_='neighborhood').get_text()

    # Critic's Pick?
    if boa.find('span', {'class': 'css-14dcre2'}):
        cpick = True
    else:
        cpick = False

    # Restaurant Atmosphere
    try:
        atmosphere = boa.find('div', class_='atmosphere').find('dd').get_text()
    except:
        atmosphere = nan_var

    # Sound
    try:
        sound = boa.find('div', class_='noiseLevel').find('dd').get_text()
    except:
        sound = nan_var

    # Recommendations
    try:
        recs = boa.find('div', class_='recommendedDishes').find('dd').get_text()
    except:
        recs = nan_var

    # Menu Link
    try:
        menu_link = boa.find('div', class_='menuLink').find('a', href=True)['href']
    except:
        menu_link = nan_var

    # Drinks
    try:
        drinks = boa.find('div', class_='alcoholInfo').find('dd').get_text()
    except:
        drinks = nan_var

    # Price
    try:
        price = boa.find('dd', class_='price').get_text()
    except:
        price = nan_var

    # Hours
    try:
        hours = boa.find('dd', class_='hours').get_text()
    except:
        hours = nan_var

    # Reservations
    try:
        resis = boa.find('dd', class_='reservations').get_text()
    except:
        resis = nan_var

    # URL
    try:
        url = soup.find('meta', {'property':'og:url'}).get('content')
    except:
        url = nan_var
            
    # Extract Meta Tags - Reviewer, Date, Keywords, Article_ID
    for tag in soup.find_all('meta'):

        if tag.get('name', None) == 'byl':
            reviewer = tag.get('content', None).replace('By ', '').strip()

        if tag.get('name', None) == 'pdate':
            rev_date = tag.get('content', None).strip()

        if tag.get('name', None) == 'news_keywords':
            keywords = tag.get('content', None).strip()

        if tag.get('name', None) == 'articleid':
            article_id = tag.get('content', None).strip()
            
        
    rev_dict = {'name': name,
                'title': title,
                'review_url': url,
                'review_date': rev_date, 
                'reviewer': reviewer, 
                'rating': rating, 
                'neighborhood': hood,
                'critic_pick': cpick,
                'atmosphere': atmosphere,
                'sound': sound,
                'recommendations': recs,
                'menu': menu_link,
                'drinks': drinks,
                'price': price,
                'hours': hours,
                'reservations': resis,
                'keywords': keywords,
                'article_id': article_id,
                'review': article}
    
    return rev_dict

In [226]:
def parse_modern_reviews(soup):
    
    # Title
    bad_str = ' - The New York Times'
    title = soup.find('title').get_text().replace(bad_str,'')
    
    for tag in soup.find_all('meta'):
        #Reviewer
        if tag.get('name', None) == 'author':
            reviewer = tag.get('content', None).strip()
        #Review Date
        if tag.get('name', None) == 'pdate':
            rev_date = tag.get('content', None).strip()
        #Keywords
        if tag.get('name', None)== 'news_keywords':
            keywords = tag.get('content', None).strip()
    
    #End of Article summary information:
    EOA = soup.find('aside', class_='review-details restaurant-details')
    
    #restaurant name
    name = EOA.find('h4').get_text()

    #Rating
    if EOA.find('li', class_='critic-star-rating'):
        rating = EOA.find('li', class_='critic-star-rating').get_text()
    elif EOA.find('li', class_='critic-word-rating'):
        rating = EOA.find('li', class_='critic-word-rating').get_text()
    else:
        rating = float('nan')

    #Neighborhood
    hood = EOA.find('p', itemprop='addressLocality').get_text()

    #Critic pick T/F
    if EOA.find('li', class_='critics-pick'):
        cpick = True
    else:
        cpick = False
    
    #Atmosphere
    try:
        atmosphere = EOA.find('span', text='Atmosphere').parent.find('span', itemprop='review').get_text()
    except:
        atmosphere = float('nan')
    
    #Sound
    try:
        sound = EOA.find('span', text='Sound').parent.find('span', itemprop='review').get_text()
    except:
        sound = float('nan')
    
    #Menu Recommendations
    try:
        recs = EOA.find('span', text='Recommended Dishes').parent.find('span', itemprop='menu').get_text()
    except:
        recs = float('nan')
    
    #Menu Link
    try:
        menu_link = EOA.find('span', text='Menu').parent.find('span', itemprop='menu').find('a').get('href')
    except:
        menu_link = float('nan')
    
    #Drinks
    try:
        drinks = EOA.find('span', text='Drinks and Wine').parent.find('span', itemprop='menu').get_text()
    except:
        drinks = float('nan')
    
    #Price
    try:
        price = EOA.find('span', itemprop='priceRange').get_text()
    except:
        price = float('nan')
    
    #Hours
    try:
        hours = EOA.find('time').get('datetime')
    except:
        hours = float('nan')
    
    #Reservations
    try:
        resis = EOA.find('span', itemprop='acceptsReservations').get_text()
    except:
        resis = float('nan')
    
    #Review Text
    review = []
    
    for p in soup.find_all('p', class_='story-body-text story-content'):
        if p.get('data-para-count') == '8':
            break
        else:
            review.append(p.get_text())
            
    article = ' '.join(review)

    #Article ID
    article_id = soup.find('meta', itemprop='identifier').get('content')
    
    #Review URL
    url = soup.find('meta', {'property':'og:url'}).get('content')
    
    rev_dict = {'name': name,
                'title': title,
                'review_url': url,
                'review_date': rev_date, 
                'reviewer': reviewer, 
                'rating': rating, 
                'neighborhood': hood,
                'critic_pick': cpick,
                'atmosphere': atmosphere,
                'sound': sound,
                'recommendations': recs,
                'menu': menu_link,
                'drinks': drinks,
                'price': price,
                'hours': hours,
                'reservations': resis,
                'article_id': article_id,
                'keywords': keywords,
                'review': article}
    
    return rev_dict 

In [227]:
def parse_archived_reviews(soup):
    
    # Title
    bad_str = ' - The New York Times'
    title = soup.find('title').get_text().replace(bad_str,'')
    
    #Reviewer
    try:
        reviewer = soup.find('meta',{'name':'author'}).get('content')
    except:
        reviewer = float('nan')
    
    #Review Date
    try:
        rev_date = soup.find('meta',{'name':'pdate'}).get('content')
    except:
        rev_date = float('nan')
    
    #Keywords
    try:
        keywords = soup.find('meta',{'name':'news_keywords'}).get('content')
    except:
        keywords = float('nan')
        
    #Article ID
    try:
        article_id = soup.find('meta', itemprop='identifier').get('content')
    except:
        article_id = float('nan')
        
    #URL
    try: 
        url = soup.find('meta', {'property':'og:url'}).get('content')
    except:
        url = float('nan')
    
    
    #restaurant name & rating
    regex = re.compile("([a-zA-Z0-9_ ]+)(?:[ \n]+\*+ \[Rating: )([a-zA-Z0-9_ ]+)] [a-zA-Z0-9_ \('\)]*[, ]{0,2}([a-zA-Z0-9_ ,]*);")
    regex2 = re.compile("([a-zA-Z0-9_ .']+) ([A-Z]{4,12}) [a-zA-Z0-9_ \('\)]+, ([a-zA-Z0-9_ ,]+);")
    if regex.search(str(soup)):
        name = regex.search(str(soup))[1].strip()
        rating = regex.search(str(soup))[2]
        hood = regex.search(str(soup))[3]
    elif regex2.search(str(soup)):
        name = regex2.search(str(soup))[1].strip()
        rating = regex2.search(str(soup))[2]
        hood = regex2.search(str(soup))[3]
    elif soup.find('strong') and re.search('\*{1,5}',str(soup)) and re.search("\d+ [\w ()'.]+, ([\w (),]+);",str(soup)):
        name = soup.find('strong').get_text()
        rating = re.search('\*{1,5}',str(soup))[0]
        hood = re.search("\d+ [\w ()'.]+, ([\w (),]+);",str(soup))[1]
    elif soup.find('strong'):
        name = soup.find('strong').get_text()
        rating = soup.find('strong').find_parent().find_next_sibling().get_text()
        hood = soup.find('strong').find_parent().find_next_sibling().find_next_sibling().get_text()
    else:
        name = float('nan')
        rating = float('nan')
        hood = float('nan')    
    
    #Another Name Function (Maks)
    if name == 'nan':
        for par in soup.find_all('p', class_='story-body-text story-content'):
            string = str(par.get_text().strip())
            res = re.search('(^[A-Za-z]+)*$', string).group(1)
            if res:
                name = res
                
    #Atmosphere
    try:
        atmosphere = re.search('[ATMOSPHERatmospher]{10}[: -]+([\w\d \-,;\(\)]+).',str(soup))[1]
    except:
        atmosphere = float('nan')
    
    #Critic pick T/F
    cpick = float('nan')
    
    #Sound
    try:
        sound = re.search('[SOUNDsound]{5} [LEVlev]{5}[: -]+([\w\d \-,;\(\)]+).',str(soup))[1]
    except:
        sound = float('nan')
    
    #Menu Recommendations
    try:
        recs = re.search('[RECOMNDecomnd]{11} [DISHEdishe]{6}[: -]+([[\w\d \-,;\(\)]+).',str(soup))[1]
    except:
        recs = float('nan')
    
    #Menu Link
    menu_link = float('nan')
    
    #Drinks
    drinks = float('nan')
    
    #Price
    try:
        price = re.search("[PRICEprice]{5} [RANGErange]{5}[: -]+([\w\d \-,;\(\)$']+).", str(soup))[1]
    except:
        price = float('nan')
    
    #Hours
    try:
        hours = re.search("[HOURS]{5}[: -]+([\w\d \-,;\(\)$':]+).",str(soup))[1]
    except:
        hours = float('nan')
    
    #Reservations
    try:
        resis = re.search("[RESVATION]{12}[: -]+([\w\d \-,;\(\)$':]+).", str(soup))[1]
    except:
        resis = float('nan')
    
    #Review Text
    review = []
    for p in soup.find_all('p', class_='story-body-text story-content'):
        review.append(p.get_text())
    article = ' '.join(review)
    
    
    
    rev_dict = {'name': name,
                'title': title,
                'review_url': url,
                'review_date': rev_date, 
                'reviewer': reviewer, 
                'rating': rating, 
                'neighborhood': hood,
                'critic_pick': cpick,
                'atmosphere': atmosphere,
                'sound': sound,
                'recommendations': recs,
                'menu': menu_link,
                'drinks': drinks,
                'price': price,
                'hours': hours,
                'reservations': resis,
                'review': article,
                'article_id': article_id,
                'keywords': keywords}
    
    return rev_dict 

In [235]:
def parse_urls(urls):
    
    list_of_dicts = []
    error_dict = []
    
    recent_count = 0
    modern_count = 0
    archive_count = 0
    broken_modern_reviews = 0
    
    for review_url in urls:
        
        try:
            time.sleep(2)

            # GET HTML
            r = requests.get(review_url)
            c = r.content

            # CREATE BeautifulSoup Object
            soup = BeautifulSoup(c, 'html.parser')

            # "Check" object: If this object exists, the NYT Review is Recent
            check_new = soup.find('article').find('div', class_='css-53u6y8')

            # "Check" object: If this object exists, the NYT Review is an Archive
            try:
                check_archived = soup.find('span', class_='kicker-label').find('a', text='Archives')
            except:
                pass
            
            # Check object: If this object exists, the NYT Review is an annoying, 'Modern' format
            check_broken_modern = soup.find('aside', class_='review-details restaurant-details')
            
            # Check if Recent
            if check_new == None:

                # If Not Recent, Check if Archived
                if check_archived == None:
                    
                    # If not Archived, check if Broken Modern
                    if check_broken_modern == None:
                        print('Broken Modern Review: ', review_url)
                        list_of_dicts.append(parse_archived_reviews(soup))
                        broken_modern_reviews += 1
                            
                    else:
                        
                        print('Modern Review: ', review_url)
                        list_of_dicts.append(parse_modern_reviews(soup))
                        modern_count += 1

                # If Archived:
                else:
                    print('Archived Review: ', review_url)
                    list_of_dicts.append(parse_archived_reviews(soup))
                    archive_count += 1

            # If Recent
            else:
                print('Recent Review: ', review_url)
                list_of_dicts.append(parse_recent_reviews(soup))
                recent_count += 1
            
        except:
            print('***Error: ', review_url)
            error_dict.append(review_url)
            
    return list_of_dicts, len(error_dict), error_dict, recent_count, modern_count, archive_count

In [236]:
list_of_dicts, error_dict, error_dict, recent_count, modern_count, archive_count = parse_urls(review_urls)

Broken Modern Review:  https://www.nytimes.com/2006/02/08/dining/reviews/08rest.html
Modern Review:  https://www.nytimes.com/2010/04/07/dining/reviews/07rest.html
Archived Review:  https://www.nytimes.com/2005/09/28/dining/reviews/a-new-place-where-new-isnt-the-goal.html
Modern Review:  https://www.nytimes.com/2008/07/02/dining/reviews/02rest.html
Modern Review:  https://www.nytimes.com/2010/07/14/dining/reviews/14rest.html
Archived Review:  https://www.nytimes.com/1999/09/22/dining/restaurants-where-the-hip-gather-and-the-food-is-fun.html
Modern Review:  https://www.nytimes.com/2011/09/07/dining/reviews/craft-nyc-restaurant-review.html
Modern Review:  https://www.nytimes.com/2006/06/28/dining/reviews/28rest.html
Modern Review:  https://www.nytimes.com/2012/03/14/dining/reviews/acme-signals-the-arrival-of-new-nordic-cuisine.html
Archived Review:  https://www.nytimes.com/2004/03/31/dining/restaurants-aromas-of-wood-smoke-raising-hopes.html
Archived Review:  https://www.nytimes.com/2002/

Archived Review:  https://www.nytimes.com/2006/08/23/dining/reviews/a-turkish-chef-playing-hideandseek.html
Archived Review:  https://www.nytimes.com/2005/12/07/dining/reviews/looking-for-a-nod-by-winking-at-italy.html
Broken Modern Review:  https://www.nytimes.com/2009/08/05/dining/reviews/05rest.html
Archived Review:  https://www.nytimes.com/2000/06/07/dining/restaurants-the-food-speaks-for-itself-and-says-it-all.html
Modern Review:  https://www.nytimes.com/2011/05/04/dining/reviews/04rest.html
Archived Review:  https://www.nytimes.com/2000/08/23/dining/restaurants-behind-plain-decor-a-bold-kitchen.html
Recent Review:  https://www.nytimes.com/2017/01/24/dining/fowler-and-wells-restaurant-review-beekman-hotel.html
Modern Review:  https://www.nytimes.com/2011/06/01/dining/reviews/tenpenny-in-midtown-nyc-restaurant-review.html
Modern Review:  https://www.nytimes.com/2010/06/23/dining/reviews/23rest.html
Recent Review:  https://www.nytimes.com/2016/01/13/dining/pete-wells-per-se-review.h

Archived Review:  https://www.nytimes.com/2004/01/28/dining/restaurants-tapas-for-really-close-friends.html
Recent Review:  https://www.nytimes.com/2016/09/28/dining/sauvage-restaurant-review.html
Archived Review:  https://www.nytimes.com/2002/04/10/dining/restaurants-where-the-end-of-the-meal-is-the-beginning.html
Archived Review:  https://www.nytimes.com/2003/07/02/dining/restaurants-wine-for-appetizer-entree-and-dessert.html
***Error:  https://www.nytimes.com/2003/07/02/dining/restaurants-wine-for-appetizer-entree-and-dessert.html
Modern Review:  https://www.nytimes.com/2010/09/22/dining/reviews/22rest.html
Recent Review:  https://www.nytimes.com/2018/02/06/dining/grant-achatz-office-aviary-review.html
Recent Review:  https://www.nytimes.com/2018/01/09/dining/ferris-review-chelsea.html
Archived Review:  https://www.nytimes.com/2005/10/05/dining/reviews/baby-steps-on-a-road-of-good-intentions.html
Modern Review:  https://www.nytimes.com/2008/08/06/dining/reviews/06rest.html
Modern Re

Modern Review:  https://www.nytimes.com/2008/10/08/dining/reviews/08rest.html
Modern Review:  https://www.nytimes.com/2008/12/10/dining/reviews/10rest.html
Modern Review:  https://www.nytimes.com/2007/04/04/dining/reviews/04rest.html
***Error:  https://www.nytimes.com/2007/04/04/dining/reviews/04rest.html
Modern Review:  https://www.nytimes.com/2008/04/30/dining/reviews/30rest.html
Archived Review:  https://www.nytimes.com/2003/11/26/dining/restaurants-a-tiny-box-sparkling-with-seafood.html
Archived Review:  https://www.nytimes.com/2001/09/19/dining/restaurants-an-alliance-of-sun-dappled-cuisines.html
Archived Review:  https://www.nytimes.com/2002/10/09/dining/restaurants-two-chefs-free-to-cook-as-they-please.html
Archived Review:  https://www.nytimes.com/2000/06/21/dining/restaurants-curiouser-and-curiouser-chapter-2.html
Recent Review:  https://www.nytimes.com/2017/01/17/dining/ichiran-review-ramen-brooklyn.html
Modern Review:  https://www.nytimes.com/2009/11/11/dining/reviews/11rest

Modern Review:  https://www.nytimes.com/2009/11/04/dining/reviews/04rest.html
Archived Review:  https://www.nytimes.com/2004/10/20/dining/reviews/where-old-ghosts-fight-for-a-table.html
Archived Review:  https://www.nytimes.com/2006/12/27/dining/reviews/tough-love-at-the-sushi-bar.html
Modern Review:  https://www.nytimes.com/2007/12/05/dining/reviews/05rest.html
Archived Review:  https://www.nytimes.com/2001/03/14/dining/restaurants-in-new-york-promise-fulfilled.html
Broken Modern Review:  https://www.nytimes.com/2006/03/01/dining/reviews/01rest.html
Recent Review:  https://www.nytimes.com/2015/10/28/dining/o-ya-review.html
Modern Review:  https://www.nytimes.com/2009/08/12/dining/reviews/12rest.html
Broken Modern Review:  https://www.nytimes.com/2006/11/15/dining/reviews/15rest.html
Modern Review:  https://www.nytimes.com/2011/07/06/dining/reviews/the-dutch-nyc-restaurant-review.html
Broken Modern Review:  https://www.nytimes.com/2006/06/14/dining/reviews/14rest.html
Archived Review: 

Archived Review:  https://www.nytimes.com/2000/07/26/dining/restaurants-taking-fusion-cuisine-into-new-territory.html
Modern Review:  https://www.nytimes.com/2007/02/07/dining/reviews/07rest.html
Modern Review:  https://www.nytimes.com/2007/01/31/dining/reviews/31rest.html
Archived Review:  https://www.nytimes.com/2004/03/03/dining/restaurants-solid-and-cozy-short-on-ostentation.html
Archived Review:  https://www.nytimes.com/2003/02/05/dining/restaurants-a-new-face-in-the-trattoria-crowd.html
Archived Review:  https://www.nytimes.com/1999/10/13/dining/restaurants-in-tribeca-simplicity-and-soft-edges.html
Modern Review:  https://www.nytimes.com/2009/06/10/dining/reviews/10rest.html
Archived Review:  https://www.nytimes.com/1999/09/29/dining/restaurants-from-northern-italy-without-the-frills.html
Archived Review:  https://www.nytimes.com/2005/01/12/dining/fish-thats-raw-but-never-undressed.html
Recent Review:  https://www.nytimes.com/2017/01/31/dining/chumleys-review-bar-west-village.htm

Archived Review:  https://www.nytimes.com/2004/06/16/dining/restaurants-a-menu-as-prodigious-as-the-prices.html
Archived Review:  https://www.nytimes.com/1998/12/30/dining/restaurants-a-feast-of-hong-kong-delicacies.html
Archived Review:  https://www.nytimes.com/2005/03/16/dining/reviews/only-the-four-stars-remain-constant.html
Archived Review:  https://www.nytimes.com/1999/07/28/dining/restaurants-brash-and-latin-tailor-made-for-its-spot.html
Recent Review:  https://www.nytimes.com/2018/12/11/dining/best-restaurant-nyc-pete-wells.html
***Error:  https://www.nytimes.com/2018/12/11/dining/best-restaurant-nyc-pete-wells.html
Recent Review:  https://www.nytimes.com/2018/06/12/dining/shabushabu-macoron-review.html
Broken Modern Review:  https://www.nytimes.com/2009/09/02/dining/02nlede.html
***Error:  https://www.nytimes.com/2009/09/02/dining/02nlede.html
Archived Review:  https://www.nytimes.com/2002/12/18/dining/restaurants-heading-up-broadway-toward-provence.html
Archived Review:  https

Modern Review:  https://www.nytimes.com/2012/06/06/dining/reviews/restaurant-review-neta-in-greenwich-village.html
Broken Modern Review:  https://www.nytimes.com/2009/09/02/dining/02tien.html
***Error:  https://www.nytimes.com/2009/09/02/dining/02tien.html
Modern Review:  https://www.nytimes.com/2008/07/30/dining/reviews/30rest.html
Archived Review:  https://www.nytimes.com/2006/08/02/dining/reviews/food-youd-almost-rather-hug-than-eat.html
Modern Review:  https://www.nytimes.com/2009/11/18/dining/reviews/18rest.html
Archived Review:  https://www.nytimes.com/2001/12/26/dining/restaurants-an-inventive-menu-that-travels-the-map.html
Archived Review:  https://www.nytimes.com/1997/10/29/dining/restaurants-caviar-by-the-mother-of-pearl-spoonful.html
Modern Review:  https://www.nytimes.com/2006/07/12/dining/reviews/12rest.html
Archived Review:  https://www.nytimes.com/1999/11/10/dining/restaurants-cafe-dining-with-a-touch-of-cheek.html
Archived Review:  https://www.nytimes.com/2000/05/10/din

Broken Modern Review:  https://www.nytimes.com/2009/09/02/dining/02tony.html
***Error:  https://www.nytimes.com/2009/09/02/dining/02tony.html
Archived Review:  https://www.nytimes.com/1999/01/13/dining/restaurants-a-menu-big-on-words-and-flavors.html
Archived Review:  https://www.nytimes.com/2000/10/25/dining/restaurants-twisting-the-classics-italian-goes-nuova.html
Modern Review:  https://www.nytimes.com/2006/06/07/dining/reviews/07rest.html
Broken Modern Review:  https://www.nytimes.com/2005/06/01/dining/reviews/a-pastry-chef-unleashed.html
Archived Review:  https://www.nytimes.com/2002/09/25/dining/restaurants-latin-american-cuisine-takes-to-the-stage.html
Recent Review:  https://www.nytimes.com/2017/03/28/dining/augustine-review-french-food-nyc-keith-mcnally.html
Archived Review:  https://www.nytimes.com/1998/06/24/dining/restaurants-a-clairvoyant-in-the-kitchen.html
Broken Modern Review:  https://www.nytimes.com/2010/05/19/dining/reviews/19rest.html
Broken Modern Review:  https://

Modern Review:  https://www.nytimes.com/2008/03/19/dining/reviews/19rest.html
Archived Review:  https://www.nytimes.com/1998/12/23/dining/restaurants-a-nobu-where-you-can-just-drop-in.html
Archived Review:  https://www.nytimes.com/1999/06/30/dining/restaurants-a-rural-italian-stage-a-complicated-script.html
Archived Review:  https://www.nytimes.com/2004/09/15/dining/reviews/a-japanese-secret-fresh-and-simple.html
Recent Review:  https://www.nytimes.com/2016/02/10/dining/llama-inn-review.html
Recent Review:  https://www.nytimes.com/2016/05/18/dining/momofuku-nishi-review.html
Archived Review:  https://www.nytimes.com/2004/09/08/dining/the-magic-of-napa-with-urban-polish.html
Archived Review:  https://www.nytimes.com/2001/06/06/dining/restaurants-lutece-leaves-the-past-and-steps-into-the-future.html
Recent Review:  https://www.nytimes.com/2017/05/09/dining/atoboy-review-korean-restaurant-nyc.html
Broken Modern Review:  https://www.nytimes.com/2012/12/26/dining/reviews/12-restaurant-trium

Archived Review:  https://www.nytimes.com/1999/09/01/dining/restaurants-even-the-doggy-bags-really-care.html
Recent Review:  https://www.nytimes.com/2018/05/22/dining/wokuni-sushi-japanese-restaurant.html
Archived Review:  https://www.nytimes.com/2002/10/23/dining/restaurants-rustic-portuguese-with-no-apologies.html
Archived Review:  https://www.nytimes.com/2005/06/22/dining/reviews/not-the-place-to-wear-hot-pink.html
Modern Review:  https://www.nytimes.com/2011/06/08/dining/reviews/imperial-no-nine-nyc-restaurant-review.html
Archived Review:  https://www.nytimes.com/2001/08/22/dining/restaurants-midtown-playground-for-an-uptown-chef.html
Archived Review:  https://www.nytimes.com/2001/02/21/dining/restaurants-a-flight-of-fancy-begins-in-portugal.html
Archived Review:  https://www.nytimes.com/2001/10/03/dining/restaurants-now-they-ll-even-cook-it-for-you.html
Modern Review:  https://www.nytimes.com/2009/06/03/dining/reviews/03rest.html
Modern Review:  https://www.nytimes.com/2008/06/04/

Modern Review:  https://www.nytimes.com/2007/07/04/dining/reviews/04rest.html
Modern Review:  https://www.nytimes.com/2007/01/10/dining/reviews/10rest.html
Archived Review:  https://www.nytimes.com/2005/10/26/dining/yelping-warriors-and-rocks-in-the-broth.html
Recent Review:  https://www.nytimes.com/2018/10/30/dining/henry-at-life-hotel-by-jj-review.html
Modern Review:  https://www.nytimes.com/2010/06/02/dining/reviews/02rest.html
Archived Review:  https://www.nytimes.com/2002/04/24/dining/restaurants-a-japanese-french-hybrid-in-little-bites.html
Archived Review:  https://www.nytimes.com/1998/11/11/dining/restaurants-redecorated-ambitious-and-neighborly.html
Archived Review:  https://www.nytimes.com/2004/05/05/dining/restaurants-spaetzle-and-schnitzel-unclouded-by-foam.html
Recent Review:  https://www.nytimes.com/2016/10/05/dining/carla-hall-southern-kitchen-review.html
Modern Review:  https://www.nytimes.com/2006/05/17/dining/reviews/17rest.html
Archived Review:  https://www.nytimes.c

Modern Review:  https://www.nytimes.com/2010/04/28/dining/reviews/28rest.html
Archived Review:  https://www.nytimes.com/2004/03/10/dining/restaurants-fusion-as-it-used-to-be-but-more-exotic.html
Recent Review:  https://www.nytimes.com/2017/07/25/dining/atla-review-mexican-restaurant-noho.html
Archived Review:  https://www.nytimes.com/2001/03/21/dining/restaurants-something-for-everyone-on-every-plate.html
Modern Review:  https://www.nytimes.com/2010/04/21/dining/reviews/21rest.html
Modern Review:  https://www.nytimes.com/2010/01/27/dining/reviews/27rest.html
Archived Review:  https://www.nytimes.com/2001/02/28/dining/restaurants-a-simple-equation-for-the-financial-district.html
Broken Modern Review:  https://www.nytimes.com/2009/09/02/dining/02later.html
Modern Review:  https://www.nytimes.com/2012/08/29/dining/reviews/rosemarys-in-greenwich-village.html
Modern Review:  https://www.nytimes.com/2008/02/13/dining/reviews/13rest.html
Modern Review:  https://www.nytimes.com/2011/04/27/dini

In [237]:
len(list_of_dicts)

952

In [63]:
df.to_csv('all_reviews.csv', encoding='utf-8')

In [96]:
df_all.shape

(967, 18)

In [97]:
df_all.isnull().sum()

article_id           0
atmosphere         177
critic_pick        486
drinks             624
hours              186
keywords           486
menu               549
name               210
neighborhood       486
price               49
rating             401
recommendations    178
reservations       345
review               0
review_date          0
review_url           0
reviewer             2
sound              180
dtype: int64

In [68]:
df_og = pd.read_csv('reviews.csv', index_col='Unnamed: 0')

In [69]:
df_og = df_og.rename(columns={'review_link_1': 'review_url'})

In [78]:
df_og = df_og.drop_duplicates(subset='review_url')

In [79]:
df_og.shape

(518, 8)

In [80]:
df_og['review_url'].value_counts()

https://www.nytimes.com/2003/06/25/dining/restaurants-the-light-of-nice-shines-on-the-west-side.html                                          1
https://www.nytimes.com/2010/08/04/dining/reviews/04rest.html                                                                                 1
https://www.nytimes.com/2008/11/05/dining/reviews/05rest.html                                                                                 1
https://www.nytimes.com/2010/09/08/dining/reviews/08under.html                                                                                1
https://www.nytimes.com/2015/03/25/dining/restaurant-review-little-park-in-tribeca.html                                                       1
https://www.nytimes.com/2010/02/17/dining/reviews/17rest.html                                                                                 1
https://www.nytimes.com/2005/11/23/dining/reviews/cinderella-and-her-popular-stepsister.html                                            

In [None]:
review_dict, len_errors, error_dict, recent_count, modern_count, archive_count = parse_urls(df_og['review_url'])

Broken Modern Review:  https://www.nytimes.com/2006/02/08/dining/reviews/08rest.html
Modern Review:  https://www.nytimes.com/2010/04/07/dining/reviews/07rest.html
Archived Review:  https://www.nytimes.com/2005/09/28/dining/reviews/a-new-place-where-new-isnt-the-goal.html
Modern Review:  https://www.nytimes.com/2008/07/02/dining/reviews/02rest.html
Modern Review:  https://www.nytimes.com/2010/07/14/dining/reviews/14rest.html
Archived Review:  https://www.nytimes.com/1999/09/22/dining/restaurants-where-the-hip-gather-and-the-food-is-fun.html
Modern Review:  https://www.nytimes.com/2011/09/07/dining/reviews/craft-nyc-restaurant-review.html
Modern Review:  https://www.nytimes.com/2006/06/28/dining/reviews/28rest.html
Modern Review:  https://www.nytimes.com/2012/03/14/dining/reviews/acme-signals-the-arrival-of-new-nordic-cuisine.html
Archived Review:  https://www.nytimes.com/2004/03/31/dining/restaurants-aromas-of-wood-smoke-raising-hopes.html
Archived Review:  https://www.nytimes.com/2002/

In [16]:
df_og_urls = pd.read_csv('reviews.csv')

In [17]:
df_og_urls = df_og_urls['review_link_1']

In [81]:
df_og.shape

(518, 8)

In [18]:
review_dict, len_errors, error_dict, recent_count, modern_count, archive_count = parse_urls(df_og_urls)

Recent Review:  https://www.nytimes.com/2019/05/21/dining/hanon-review.html
Recent Review:  https://www.nytimes.com/2019/05/14/dining/del-posto-review-pete-wells.html
Recent Review:  https://www.nytimes.com/2019/05/07/dining/the-freakin-rican-restaurant-review.html
Recent Review:  https://www.nytimes.com/2019/04/23/dining/wayan-restaurant-review.html
Recent Review:  https://www.nytimes.com/2019/04/16/dining/niche-review-mazemen.html
Recent Review:  https://www.nytimes.com/2019/04/09/dining/haenyeo-restaurant-review-jenny-kwak.html
Recent Review:  https://www.nytimes.com/2019/04/02/dining/standard-grill-review.html
Recent Review:  https://www.nytimes.com/2019/03/26/dining/violet-pizza-review.html
Recent Review:  https://www.nytimes.com/2019/03/19/dining/odo-restaurant-review.html
Recent Review:  https://www.nytimes.com/2019/03/12/dining/cka-ka-qellu-review.html
Recent Review:  https://www.nytimes.com/2019/03/05/dining/madame-vo-bbq-review.html
Recent Review:  https://www.nytimes.com/201

Recent Review:  https://www.nytimes.com/2017/04/11/dining/torishin-review-japanese-yakitori.html
Recent Review:  https://www.nytimes.com/2017/04/04/dining/babbo-review-pete-wells.html
Recent Review:  https://www.nytimes.com/2017/03/28/dining/augustine-review-french-food-nyc-keith-mcnally.html
Recent Review:  https://www.nytimes.com/2017/03/21/dining/sugarfish-by-sushi-nozawa-review.html
Recent Review:  https://www.nytimes.com/2017/03/14/dining/franklin-barbecue-review.html
Recent Review:  https://www.nytimes.com/2017/02/21/dining/sahib-review-indian-restaurant-nyc.html
Recent Review:  https://www.nytimes.com/2017/02/14/dining/chinese-tuxedo-restaurant-review.html
Recent Review:  https://www.nytimes.com/2017/01/31/dining/chumleys-review-bar-west-village.html
Recent Review:  https://www.nytimes.com/2017/01/24/dining/fowler-and-wells-restaurant-review-beekman-hotel.html
Recent Review:  https://www.nytimes.com/2017/01/17/dining/ichiran-review-ramen-brooklyn.html
Recent Review:  https://www

Recent Review:  https://www.nytimes.com/2015/02/18/dining/restaurant-review-shuko-in-the-east-village.html
Recent Review:  https://www.nytimes.com/2015/02/04/dining/restaurant-review-cosme-in-the-flatiron-district.html
Recent Review:  https://www.nytimes.com/2015/01/28/dining/restaurant-review-blue-smoke-and-north-end-grill.html
Recent Review:  https://www.nytimes.com/2015/01/14/dining/restaurant-review-upland-on-park-avenue-south.html
Recent Review:  https://www.nytimes.com/2015/01/07/dining/restaurant-review-kappo-masa-on-the-upper-east-side.html
Recent Review:  https://www.nytimes.com/2014/12/10/dining/restaurant-review-dirty-french-on-the-lower-east-side.html
Recent Review:  https://www.nytimes.com/2014/12/03/dining/restaurant-review-danny-meyers-marta-in-nomad.html
Recent Review:  https://www.nytimes.com/2014/11/26/dining/restaurant-review-bar-bolonat-in-the-west-village.html
Recent Review:  https://www.nytimes.com/2014/11/12/dining/restaurant-review-dumpling-galaxy-in-queens.html

Recent Review:  https://www.nytimes.com/2013/05/15/dining/reviews/restaurant-review-caravaggio-on-the-italian-upper-east-side.html
Recent Review:  https://www.nytimes.com/2013/04/24/dining/reviews/restaurant-review-randazzos-clam-bar-in-sheepshead-bay.html
Recent Review:  https://www.nytimes.com/2013/04/03/dining/reviews/restaurant-review-hanjan-in-manhattan.html
Recent Review:  https://www.nytimes.com/2013/03/27/dining/reviews/restaurant-review-the-dining-room-at-the-modern.html
Recent Review:  https://www.nytimes.com/2013/03/20/dining/reviews/restaurant-review-chez-sardine-in-the-west-village.html
Recent Review:  https://www.nytimes.com/2013/03/13/dining/reviews/restaurant-review-jeepney-in-the-east-village-pig-and-khao-on-the-lower-east-side.html
Recent Review:  https://www.nytimes.com/2013/03/13/dining/reviews/restaurant-review-jeepney-in-the-east-village-pig-and-khao-on-the-lower-east-side.html
Recent Review:  https://www.nytimes.com/2013/03/06/dining/reviews/restaurant-review-mig

Modern Review:  https://www.nytimes.com/2010/11/10/dining/reviews/10rest.html
Modern Review:  https://www.nytimes.com/2010/10/27/dining/reviews/27rest.html
Modern Review:  https://www.nytimes.com/2010/10/06/dining/06rest.html
Modern Review:  https://www.nytimes.com/2010/09/08/dining/reviews/08under.html
Modern Review:  https://www.nytimes.com/2010/08/18/dining/reviews/18rest.html
Modern Review:  https://www.nytimes.com/2010/08/04/dining/reviews/04rest.html
Modern Review:  https://www.nytimes.com/2010/07/28/dining/reviews/28rest.html
Modern Review:  https://www.nytimes.com/2010/07/14/dining/reviews/14rest.html
Modern Review:  https://www.nytimes.com/2010/06/30/dining/reviews/30rest.html
Modern Review:  https://www.nytimes.com/2010/06/16/dining/reviews/16rest.html
Modern Review:  https://www.nytimes.com/2010/06/02/dining/reviews/02rest.html
Modern Review:  https://www.nytimes.com/2010/05/26/dining/reviews/26rest.html
Modern Review:  https://www.nytimes.com/2010/04/28/dining/reviews/28res



Modern Review:  https://www.nytimes.com/2008/04/30/dining/reviews/30rest.html
Modern Review:  https://www.nytimes.com/2008/03/19/dining/reviews/19rest.html
Modern Review:  https://www.nytimes.com/2008/03/12/dining/reviews/12rest.html
Modern Review:  https://www.nytimes.com/2008/02/13/dining/reviews/13rest.html
Modern Review:  https://www.nytimes.com/2008/01/30/dining/reviews/30rest.html
Modern Review:  https://www.nytimes.com/2008/01/16/dining/reviews/16rest.html
Modern Review:  https://www.nytimes.com/2008/01/09/dining/reviews/09rest.html
Modern Review:  https://www.nytimes.com/2007/11/14/dining/reviews/14rest.html
Modern Review:  https://www.nytimes.com/2007/10/24/dining/reviews/24rest.html
Modern Review:  https://www.nytimes.com/2007/09/26/dining/reviews/26rest.html
Modern Review:  https://www.nytimes.com/2007/09/19/dining/reviews/19rest.html
Modern Review:  https://www.nytimes.com/2007/09/05/dining/reviews/05rest.html
Modern Review:  https://www.nytimes.com/2007/08/22/dining/review

Archived Review:  https://www.nytimes.com/2004/02/25/dining/restaurants-a-lavish-ice-palace-high-above-the-din.html
Archived Review:  https://www.nytimes.com/2004/02/04/dining/restaurants-he-s-back-kumquats-cornflakes-and-all.html
Archived Review:  https://www.nytimes.com/2004/01/14/dining/restaurants-straightforward-fare-and-license-to-linger.html
Archived Review:  https://www.nytimes.com/2003/12/31/dining/restaurants-west-side-trattoria-with-velvet-booth-style.html
Archived Review:  https://www.nytimes.com/2003/12/24/dining/restaurants-italy-north-to-south-before-the-curtain.html
Archived Review:  https://www.nytimes.com/2003/12/03/dining/restaurants-the-food-of-india-from-top-to-bottom.html
Archived Review:  https://www.nytimes.com/2003/07/16/dining/restaurants-on-49th-street-a-cool-breeze-from-mexico.html
Archived Review:  https://www.nytimes.com/2003/06/25/dining/restaurants-the-light-of-nice-shines-on-the-west-side.html
Archived Review:  https://www.nytimes.com/2003/03/12/dining/

In [82]:
df_update = pd.DataFrame(review_dict)

In [83]:
df_update = df_update.drop_duplicates(subset='review_url')

In [84]:
df_update.shape

(510, 18)

In [85]:
df_to_merge = df_update[['review_url', 'review', 'recommendations']]

In [86]:
df_to_merge.shape

(510, 3)

In [87]:
df_og.shape

(518, 8)

In [88]:
df_new = df_og.merge(df_to_merge, on='review_url', how='left')

In [89]:
df_new.shape

(518, 10)

In [92]:
df_new.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 518 entries, 0 to 517
Data columns (total 10 columns):
name               518 non-null object
rating             518 non-null object
review_date        518 non-null object
reviewer           518 non-null object
neighborhood       515 non-null object
cuisine            518 non-null object
review_url         518 non-null object
review_link_2      518 non-null object
review             510 non-null object
recommendations    436 non-null object
dtypes: object(10)
memory usage: 44.5+ KB


In [93]:
df_new.to_csv('df_updated_510_no_dupes.csv')

In [138]:
current_urls = [url for url in df_new['review_url']]

for review in list(df_new['review_link_2']):
    current_urls.append(review)

In [139]:
len(current_urls)

1036

In [156]:
df_all = pd.read_csv('all_reviews.csv', index_col='Unnamed: 0')

In [157]:
df_all.isnull().sum()

article_id           0
atmosphere         177
critic_pick        486
drinks             624
hours              186
keywords           486
menu               549
name               210
neighborhood       486
price               49
rating             401
recommendations    178
reservations       345
review               0
review_date          0
review_url           0
reviewer             2
sound              180
dtype: int64

In [158]:
df_all_na_dropped = df_all.dropna(subset=['name', 'rating'])

In [159]:
df_all_na_dropped.shape

(563, 18)

In [160]:
df_all_na_dropped = df_all_na_dropped.drop_duplicates(subset='review_url')

In [161]:
df_all_na_dropped.shape

(563, 18)

In [162]:
urls_to_add = []

for url in list(df_all_na_dropped['review_url']):

    if url not in current_urls:
        print(url)
        urls_to_add.append(url)

df_to_concat = df_all_na_dropped[df_all_na_dropped['review_url'].isin(urls_to_add)]

https://www.nytimes.com/2010/04/07/dining/reviews/07rest.html
https://www.nytimes.com/2008/07/02/dining/reviews/02rest.html
https://www.nytimes.com/2006/07/05/dining/reviews/from-delphi-by-way-of-cleveland.html
https://www.nytimes.com/2004/12/01/dining/reviews/greek-reinterpreted-in-manhattan.html
https://www.nytimes.com/2010/05/12/dining/reviews/12rest.html
https://www.nytimes.com/2009/03/11/dining/reviews/11rest.html
https://www.nytimes.com/2006/02/22/dining/reviews/22rest.html
https://www.nytimes.com/2005/05/04/dining/reviews/at-moma-dining-with-picasso.html
https://www.nytimes.com/2006/09/20/dining/reviews/hiding-in-notsoplain-sight.html
https://www.nytimes.com/2005/04/27/dining/reviews/a-fanciful-bistro-but-not-too-fancy.html
https://www.nytimes.com/2008/02/06/dining/reviews/06rest.html
https://www.nytimes.com/2007/12/26/dining/reviews/26rest.html
https://www.nytimes.com/2007/06/20/dining/reviews/20rest.html
https://www.nytimes.com/2011/06/22/dining/reviews/desmonds-nyc-restaurant

In [190]:
df_new.head()

Unnamed: 0,name,rating,review_date,reviewer,neighborhood,cuisine,review_url,review_link_2,review,recommendations
0,Hanon,2 star,"May 21, 2019",Pete Wells,Williamsburg,Japanese,https://www.nytimes.com/2019/05/21/dining/hano...,https://www.nytimes.com/2019/05/21/dining/hano...,"Hanon, a new udon shop in Williamsburg, Brookl...",Japanese omelet; fried chicken with spice; ric...
1,Del Posto,3 star,"May 14, 2019",Pete Wells,Chelsea,Italian,https://www.nytimes.com/2019/05/14/dining/del-...,https://www.nytimes.com/2019/05/14/dining/del-...,"More than a year ago, I was on the verge of re...",Grilled salsify salad; vitello tonnato; minest...
2,The Freakin Rican,1 star,"May 7, 2019",Pete Wells,Astoria,"Caribbean, Latin American",https://www.nytimes.com/2019/05/07/dining/the-...,https://www.nytimes.com/2019/05/07/dining/the-...,It is true that the bacalaitos at the Freakin ...,Alcapurrias; pasteles; bacalaitos; chicharrone...
3,Wayan,2 star,"April 23, 2019",Pete Wells,NoLIta,Indonesian,https://www.nytimes.com/2019/04/23/dining/waya...,https://www.nytimes.com/2019/04/23/dining/waya...,If you ate at Spice Market before it closed tw...,Hearts-of-palm salad; clams Jimbaran style; av...
4,Niche,1 star,"April 16, 2019",Pete Wells,Lower East Side,Japanese,https://www.nytimes.com/2019/04/16/dining/nich...,https://www.nytimes.com/2019/04/16/dining/nich...,"In 2012 and 2013, when people were lining up a...",Avocado crunch; yuzu scallop crudo; umami komb...


In [191]:
df_new['review_date'] = pd.to_datetime(df_new['review_date'])

In [192]:
df_new.head()

Unnamed: 0,name,rating,review_date,reviewer,neighborhood,cuisine,review_url,review_link_2,review,recommendations
0,Hanon,2 star,2019-05-21,Pete Wells,Williamsburg,Japanese,https://www.nytimes.com/2019/05/21/dining/hano...,https://www.nytimes.com/2019/05/21/dining/hano...,"Hanon, a new udon shop in Williamsburg, Brookl...",Japanese omelet; fried chicken with spice; ric...
1,Del Posto,3 star,2019-05-14,Pete Wells,Chelsea,Italian,https://www.nytimes.com/2019/05/14/dining/del-...,https://www.nytimes.com/2019/05/14/dining/del-...,"More than a year ago, I was on the verge of re...",Grilled salsify salad; vitello tonnato; minest...
2,The Freakin Rican,1 star,2019-05-07,Pete Wells,Astoria,"Caribbean, Latin American",https://www.nytimes.com/2019/05/07/dining/the-...,https://www.nytimes.com/2019/05/07/dining/the-...,It is true that the bacalaitos at the Freakin ...,Alcapurrias; pasteles; bacalaitos; chicharrone...
3,Wayan,2 star,2019-04-23,Pete Wells,NoLIta,Indonesian,https://www.nytimes.com/2019/04/23/dining/waya...,https://www.nytimes.com/2019/04/23/dining/waya...,If you ate at Spice Market before it closed tw...,Hearts-of-palm salad; clams Jimbaran style; av...
4,Niche,1 star,2019-04-16,Pete Wells,Lower East Side,Japanese,https://www.nytimes.com/2019/04/16/dining/nich...,https://www.nytimes.com/2019/04/16/dining/nich...,"In 2012 and 2013, when people were lining up a...",Avocado crunch; yuzu scallop crudo; umami komb...


In [165]:
df_to_concat = df_to_concat[['name', 'rating', 'review_date', 'reviewer', 'neighborhood', 'review_url', 'review']]
df_to_concat

Unnamed: 0,name,rating,review_date,reviewer,neighborhood,review_url,review
1,Faustina,★,20100406,Sam Sifton,East Village,https://www.nytimes.com/2010/04/07/dining/revi...,JEAN-CLAUDE IZZO wrote a terrific series of Fr...
3,Bar Milano (CLOSED),★★,20080702,Frank Bruni,Kips Bay,https://www.nytimes.com/2008/07/02/dining/revi...,ITALIAN cooking is about a lot more than pasta...
15,Parea,Two Stars,20060705,Frank Bruni,,https://www.nytimes.com/2006/07/05/dining/revi...,FOR a while it seemed that most of the restaur...
19,Onera,two stars,20041201,Frank Bruni,,https://www.nytimes.com/2004/12/01/dining/revi...,THE bright lights of this big city stoke diffe...
26,Fatty 'Cue,★,20100511,Sam Sifton,Williamsburg,https://www.nytimes.com/2010/05/12/dining/revi...,"FATTY ’CUE, Zakary Pelaccio’s funky new barbec..."
29,10 Downing,★★,20090310,Frank Bruni,West Village,https://www.nytimes.com/2009/03/11/dining/revi...,WHEN last we left the restless young chef Jaso...
31,RECOMMENDED,DISHES,20060222,Frank Bruni,,https://www.nytimes.com/2006/02/22/dining/revi...,"JOHN LAFEMINA'S first restaurant, Peasant, rom..."
34,The Modern,Two Stars,20050504,Frank Bruni,,https://www.nytimes.com/2005/05/04/dining/revi...,"MANY New York restaurants boast great views, g..."
36,Freemans,SATISFACTORY,20060920,Frank Bruni,,https://www.nytimes.com/2006/09/20/dining/revi...,"IN this restaurant-packed city, all sorts of c..."
37,Florent,One Star,20050427,Frank Bruni,,https://www.nytimes.com/2005/04/27/dining/revi...,"ABOUT a year and a half ago, in a fit of unwar..."


In [167]:
df_to_concat.to_csv('df_to_input_manually.csv')

In [184]:
df_post_manual = pd.read_csv('df_post_manual.csv', index_col='Unnamed: 0')

In [185]:
df_post_manual

Unnamed: 0,name,rating,review_date,reviewer,neighborhood,review_url,review
1,Faustina,★,20100406,Sam Sifton,East Village,https://www.nytimes.com/2010/04/07/dining/revi...,JEAN-CLAUDE IZZO wrote a terrific series of Fr...
3,Bar Milano,★★,20080702,Frank Bruni,Kips Bay,https://www.nytimes.com/2008/07/02/dining/revi...,ITALIAN cooking is about a lot more than pasta...
15,Parea,★★,20060705,Frank Bruni,Midtown South,https://www.nytimes.com/2006/07/05/dining/revi...,FOR a while it seemed that most of the restaur...
19,Onera,★★,20041201,Frank Bruni,Upper West Side,https://www.nytimes.com/2004/12/01/dining/revi...,THE bright lights of this big city stoke diffe...
26,Fatty 'Cue,★,20100511,Sam Sifton,Williamsburg,https://www.nytimes.com/2010/05/12/dining/revi...,"FATTY ’CUE, Zakary Pelaccio’s funky new barbec..."
29,10 Downing,★★,20090310,Frank Bruni,West Village,https://www.nytimes.com/2009/03/11/dining/revi...,WHEN last we left the restless young chef Jaso...
31,The Orchard,★★,20060222,Frank Bruni,Lower East Side,https://www.nytimes.com/2006/02/22/dining/revi...,"JOHN LAFEMINA'S first restaurant, Peasant, rom..."
34,The Modern,★★,20050504,Frank Bruni,Midtown,https://www.nytimes.com/2005/05/04/dining/revi...,"MANY New York restaurants boast great views, g..."
36,Freemans,Satisfactory,20060920,Frank Bruni,Lower East Side,https://www.nytimes.com/2006/09/20/dining/revi...,"IN this restaurant-packed city, all sorts of c..."
37,Florent,★,20050427,Frank Bruni,West Village,https://www.nytimes.com/2005/04/27/dining/revi...,"ABOUT a year and a half ago, in a fit of unwar..."


In [186]:
df_post_manual['review_date'] = pd.to_datetime(df_post_manual['review_date'], format='%Y%m%d')

In [187]:
df_post_manual.head()

Unnamed: 0,name,rating,review_date,reviewer,neighborhood,review_url,review
1,Faustina,★,2010-04-06,Sam Sifton,East Village,https://www.nytimes.com/2010/04/07/dining/revi...,JEAN-CLAUDE IZZO wrote a terrific series of Fr...
3,Bar Milano,★★,2008-07-02,Frank Bruni,Kips Bay,https://www.nytimes.com/2008/07/02/dining/revi...,ITALIAN cooking is about a lot more than pasta...
15,Parea,★★,2006-07-05,Frank Bruni,Midtown South,https://www.nytimes.com/2006/07/05/dining/revi...,FOR a while it seemed that most of the restaur...
19,Onera,★★,2004-12-01,Frank Bruni,Upper West Side,https://www.nytimes.com/2004/12/01/dining/revi...,THE bright lights of this big city stoke diffe...
26,Fatty 'Cue,★,2010-05-11,Sam Sifton,Williamsburg,https://www.nytimes.com/2010/05/12/dining/revi...,"FATTY ’CUE, Zakary Pelaccio’s funky new barbec..."


In [194]:
df_new = df_new.drop(['review_link_2'], axis=1)

In [195]:
df_new.head()

Unnamed: 0,name,rating,review_date,reviewer,neighborhood,cuisine,review_url,review,recommendations
0,Hanon,2 star,2019-05-21,Pete Wells,Williamsburg,Japanese,https://www.nytimes.com/2019/05/21/dining/hano...,"Hanon, a new udon shop in Williamsburg, Brookl...",Japanese omelet; fried chicken with spice; ric...
1,Del Posto,3 star,2019-05-14,Pete Wells,Chelsea,Italian,https://www.nytimes.com/2019/05/14/dining/del-...,"More than a year ago, I was on the verge of re...",Grilled salsify salad; vitello tonnato; minest...
2,The Freakin Rican,1 star,2019-05-07,Pete Wells,Astoria,"Caribbean, Latin American",https://www.nytimes.com/2019/05/07/dining/the-...,It is true that the bacalaitos at the Freakin ...,Alcapurrias; pasteles; bacalaitos; chicharrone...
3,Wayan,2 star,2019-04-23,Pete Wells,NoLIta,Indonesian,https://www.nytimes.com/2019/04/23/dining/waya...,If you ate at Spice Market before it closed tw...,Hearts-of-palm salad; clams Jimbaran style; av...
4,Niche,1 star,2019-04-16,Pete Wells,Lower East Side,Japanese,https://www.nytimes.com/2019/04/16/dining/nich...,"In 2012 and 2013, when people were lining up a...",Avocado crunch; yuzu scallop crudo; umami komb...


In [189]:
df_post_manual.head()

Unnamed: 0,name,rating,review_date,reviewer,neighborhood,review_url,review
1,Faustina,★,2010-04-06,Sam Sifton,East Village,https://www.nytimes.com/2010/04/07/dining/revi...,JEAN-CLAUDE IZZO wrote a terrific series of Fr...
3,Bar Milano,★★,2008-07-02,Frank Bruni,Kips Bay,https://www.nytimes.com/2008/07/02/dining/revi...,ITALIAN cooking is about a lot more than pasta...
15,Parea,★★,2006-07-05,Frank Bruni,Midtown South,https://www.nytimes.com/2006/07/05/dining/revi...,FOR a while it seemed that most of the restaur...
19,Onera,★★,2004-12-01,Frank Bruni,Upper West Side,https://www.nytimes.com/2004/12/01/dining/revi...,THE bright lights of this big city stoke diffe...
26,Fatty 'Cue,★,2010-05-11,Sam Sifton,Williamsburg,https://www.nytimes.com/2010/05/12/dining/revi...,"FATTY ’CUE, Zakary Pelaccio’s funky new barbec..."


In [196]:
df_final = pd.concat([df_new, df_post_manual], axis=0)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [198]:
df_final = df_final[['name', 'rating', 'review_date', 'reviewer', 'review_url', 'neighborhood', 'cuisine','recommendations', 'review']]

In [199]:
df_final

Unnamed: 0,name,rating,review_date,reviewer,review_url,neighborhood,cuisine,recommendations,review
0,Hanon,2 star,2019-05-21,Pete Wells,https://www.nytimes.com/2019/05/21/dining/hano...,Williamsburg,Japanese,Japanese omelet; fried chicken with spice; ric...,"Hanon, a new udon shop in Williamsburg, Brookl..."
1,Del Posto,3 star,2019-05-14,Pete Wells,https://www.nytimes.com/2019/05/14/dining/del-...,Chelsea,Italian,Grilled salsify salad; vitello tonnato; minest...,"More than a year ago, I was on the verge of re..."
2,The Freakin Rican,1 star,2019-05-07,Pete Wells,https://www.nytimes.com/2019/05/07/dining/the-...,Astoria,"Caribbean, Latin American",Alcapurrias; pasteles; bacalaitos; chicharrone...,It is true that the bacalaitos at the Freakin ...
3,Wayan,2 star,2019-04-23,Pete Wells,https://www.nytimes.com/2019/04/23/dining/waya...,NoLIta,Indonesian,Hearts-of-palm salad; clams Jimbaran style; av...,If you ate at Spice Market before it closed tw...
4,Niche,1 star,2019-04-16,Pete Wells,https://www.nytimes.com/2019/04/16/dining/nich...,Lower East Side,Japanese,Avocado crunch; yuzu scallop crudo; umami komb...,"In 2012 and 2013, when people were lining up a..."
5,Haenyeo,2 star,2019-04-09,Pete Wells,https://www.nytimes.com/2019/04/09/dining/haen...,Park Slope,Korean,"Crispy chicken wings; zucchini, perilla leaf a...",Looking around Haenyeo’s dining room one night...
6,Standard Grill,2 star,2019-04-02,Pete Wells,https://www.nytimes.com/2019/04/02/dining/stan...,West Village,American,Peconic Bay scallops and uni; wild yellowfin t...,I’m starting to think the American cult of che...
7,Violet,1 star,2019-03-26,Pete Wells,https://www.nytimes.com/2019/03/26/dining/viol...,East Village,"Italian, Pizza",Pimentón bacalao; smoked mackerel pâté; grille...,When I left my home state of Rhode Island at 1...
8,Odo,3 star,2019-03-19,Pete Wells,https://www.nytimes.com/2019/03/19/dining/odo-...,Flatiron district,Japanese,The set menus change monthly and offer few cho...,Before I tell you about the seasonally attuned...
9,Cka Ka Qellu,2 star,2019-03-12,Pete Wells,https://www.nytimes.com/2019/03/12/dining/cka-...,Belmont,Eastern European,Pickled pepper; sausage dip; tarator; kajmak; ...,The best way to enter the Albanian restaurant ...


In [200]:
df_final.shape

(739, 9)

In [201]:
df_final.to_csv('final_df_v1.csv')

In [206]:
for link in df_final[df_final['name'] == 'Craftsteak']['review_url']:
    print(link)

https://www.nytimes.com/2007/05/09/dining/reviews/09rest.html
https://www.nytimes.com/2006/07/12/dining/reviews/steaks-with-lots-of-asides.html
https://www.nytimes.com/2006/07/12/dining/reviews/12rest.html


In [216]:
df_final[df_final['review_date'] == '2006-05-17']

Unnamed: 0,name,rating,review_date,reviewer,review_url,neighborhood,cuisine,recommendations,review
403,Crema,1 star,2006-05-17,Frank Bruni,https://www.nytimes.com/2006/05/17/dining/revi...,Chelsea,Mexican,,A CHICKEN entree that Julieta Ballesteros serv...
106,Crema,★,2006-05-17,Frank Bruni,https://www.nytimes.com/2006/05/17/dining/revi...,Chelsea,,,A CHICKEN entree that Julieta Ballesteros serv...


In [217]:
df_final = df_final.drop_duplicates(subset='review_date')

In [218]:
df_final.shape

(713, 9)

In [219]:
df_final.to_csv('final_df_v2.csv')

In [351]:
df_final = pd.read_csv('final_df_v2.csv', index_col='Unnamed: 0')

In [352]:
df_final

Unnamed: 0,name,rating,review_date,reviewer,review_url,neighborhood,cuisine,recommendations,review
0,Hanon,2 star,2019-05-21,Pete Wells,https://www.nytimes.com/2019/05/21/dining/hano...,Williamsburg,Japanese,Japanese omelet; fried chicken with spice; ric...,"Hanon, a new udon shop in Williamsburg, Brookl..."
1,Del Posto,3 star,2019-05-14,Pete Wells,https://www.nytimes.com/2019/05/14/dining/del-...,Chelsea,Italian,Grilled salsify salad; vitello tonnato; minest...,"More than a year ago, I was on the verge of re..."
2,The Freakin Rican,1 star,2019-05-07,Pete Wells,https://www.nytimes.com/2019/05/07/dining/the-...,Astoria,"Caribbean, Latin American",Alcapurrias; pasteles; bacalaitos; chicharrone...,It is true that the bacalaitos at the Freakin ...
3,Wayan,2 star,2019-04-23,Pete Wells,https://www.nytimes.com/2019/04/23/dining/waya...,NoLIta,Indonesian,Hearts-of-palm salad; clams Jimbaran style; av...,If you ate at Spice Market before it closed tw...
4,Niche,1 star,2019-04-16,Pete Wells,https://www.nytimes.com/2019/04/16/dining/nich...,Lower East Side,Japanese,Avocado crunch; yuzu scallop crudo; umami komb...,"In 2012 and 2013, when people were lining up a..."
5,Haenyeo,2 star,2019-04-09,Pete Wells,https://www.nytimes.com/2019/04/09/dining/haen...,Park Slope,Korean,"Crispy chicken wings; zucchini, perilla leaf a...",Looking around Haenyeo’s dining room one night...
6,Standard Grill,2 star,2019-04-02,Pete Wells,https://www.nytimes.com/2019/04/02/dining/stan...,West Village,American,Peconic Bay scallops and uni; wild yellowfin t...,I’m starting to think the American cult of che...
7,Violet,1 star,2019-03-26,Pete Wells,https://www.nytimes.com/2019/03/26/dining/viol...,East Village,"Italian, Pizza",Pimentón bacalao; smoked mackerel pâté; grille...,When I left my home state of Rhode Island at 1...
8,Odo,3 star,2019-03-19,Pete Wells,https://www.nytimes.com/2019/03/19/dining/odo-...,Flatiron district,Japanese,The set menus change monthly and offer few cho...,Before I tell you about the seasonally attuned...
9,Cka Ka Qellu,2 star,2019-03-12,Pete Wells,https://www.nytimes.com/2019/03/12/dining/cka-...,Belmont,Eastern European,Pickled pepper; sausage dip; tarator; kajmak; ...,The best way to enter the Albanian restaurant ...


In [356]:
headlines = []

for url in df_final['review_url']:
    
    time.sleep(2)

    # GET HTML
    r = requests.get(url)
    c = r.content

    # CREATE BeautifulSoup Object
    soup = BeautifulSoup(c, 'html.parser')
    
    # Title
    bad_str = ' - The New York Times'
    bad_str_2 = 'Restaurant Review: '
    bad_str_3 = ' — NYC — Restaurant Review'
    
    title = soup.find('title').get_text().replace(bad_str,'')
    title = title.replace(bad_str_2,'')
    title = title.replace(bad_str_3, '')
    
    print(title)
    
    headlines.append(title)
    

Udon, Innovated for Your Pleasure
In Del Posto’s New Era, Cuisine and Service Are at Odds
Pasteles and Two Kinds of Fried Chicken at the Freakin Rican
Tapping a Family Connection to Indonesian Food
Ramen Without Broth? A Chef Doubles Down on a Sidelined Dish
With Haenyeo, a Trailblazing Korean Chef Takes On Seafood
The TV Chef Rocco DiSpirito Returns, With a Quieter Touch
What Has New York Pizza Been Missing? Little Old Rhode Island
Kaiseki, Straight Up With a Twist
A Deep-End Albanian Experience, Tucked Into the Bronx
A Beef Feast From Vietnam Gets a New York Showcase
Can a Pop-Up Settle Down Without Losing Its Fizz?
Serving All the Food Nouvelle Cuisine Couldn’t Kill
Refined British Restaurant Found Hiding in a Brooklyn Bar
Benno, Proudly Out of Step With the Age
Why Does This Fancy Shopping Mall Smell Like Street Meat?
Hwaban Is the Modern Korean Restaurant Where You’d Take Your Mother
What If Brexit Were a Restaurant?
The Four Seasons Returns. But Can It Come Back?
Seafood From Two

Bowery Meat Company in the East Village
Via Carota in the West Village
Shuko in the East Village
Cosme in the Flatiron District
Blue Smoke and North End Grill
Upland on Park Avenue South
Kappo Masa on the Upper East Side
Dirty French on the Lower East Side
Danny Meyer’s Marta in NoMad
Bar Bolonat in the West Village
Dumpling Galaxy in Queens
Tuome in the East Village
Blue Hill in Greenwich Village
Blenheim in the West Village
Huertas in the East Village
Sammy's Roumanian Steakhouse on the Lower East Side
Claudette in Greenwich Village
Barchetta in Chelsea
Bar Primi in the East Village
Bâtard in TriBeCa
Ivan Ramen
Russ & Daughters Cafe
Racines NY in TriBeCa
Grindhaus in Red Hook, Brooklyn
Tavern on the Green
Gato From Bobby Flay
The Simone on the Upper East Side
Narcissa in the Standard East Village
The River Café at Fulton Ferry
El Quinto Pino in Chelsea
Cagen in the East Village and Ristorante Morini on the Upper East Side
Telepan Local in TriBeCa
Jean-Georges on the Upper West Side
T

Tokyo in Midtown (English Optional)
Yelping Warriors, and Rocks in the Broth
Recapturing That Tour of Italy
Take My Steak. Please.
Tradition Two Ways: Plain or Fancy
Showmanship Yields to Elegance
A Secret Too Dark to Keep
Italian, Almost Home Cooked
Of Wood Floors and a Wood-Fired Oven
At the Intersection of France and Morocco
A Place in the Mood for Anything
Japanese Chic, With Volume Turned Up
Almost as Durable as Greece Itself
Where to Take Thoreau and Dr. Atkins
Confessions of a Reformed Sushi Eater
The Old Kid on the Block
Fish That's Raw, but Never Undressed
Italian Rustic, Just Off Broadway
An Oasis for Sheiks on the East Side
A Paean to Tofu in a Japanese Pub
Left or Right, a Place to Be Centered
A Thai Pilgrimage Leads to Queens
It Was Hot. It Was Cool. That Was Then.
Italy and Iberia, Consorting Amiably
In the Meatpacking District, Packed In
A Japanese Secret, Fresh and Simple
RESTAURANTS; A French Soul, Wrapped in a Picket Fence
RESTAURANTS; Eat Up, but Don't Tell Your Card

Machismo, Down to the Studs
New Restaurant Co. Showcases Pizza
At Monkey Bar, the Laws of the Jungle Apply
Greek Again, With Even More Passion
Rouge et Blanc
Reviewing Tanuki Tavern and Ed’s Chowder House
Maryland, Maybe, Maybe Not, at Choptank in Greenwich Village
Perla in Greenwich Village
Unflinching Indulgences From Belgium
A Plea for Respect for a Familiar Fish
A Review of La Mar Cebicheria Peruana in Manhattan
More Than Just a Sequel to a Noodle Bar
Seäsonal - NYC - Restaurant Review
M. Wells in Long Island City
Where the Lights Are Never Low
Chevalier in Midtown
Gus & Gabriel Gastropub
Allegretti: Away to the South of France
Frank Bruni on Momofuku Ko
Isa
Sam Sifton at A Voce Columbus
The John Dory: A British Seafood Restaurant Like No Other
Veritas
The John Dory Oyster Bar
A Review of Saul on Smith Street in Brooklyn
Keith McNally’s Cherche Midi
In This Climate, Comfort Food Trumps Creativity
Old MacDonald Had a Quail
Calliope in the East Village
A French Classic in a Brasserie

In [357]:
df_final['headline'] = headlines

In [358]:
def clean_headline(headline):
    
    bad_str1 = 'RESTAURANTS; '
    bad_str2 = 'Restaurants; '
    
    cleaned = headline.replace(bad_str1, '')
    cleaned = cleaned.replace(bad_str2, '')
    
    if headline == 'Restaurants':
        
        return float('NaN')
    
    return cleaned

df_final['headline'] = df_final['headline'].apply(lambda x: clean_headline(x))

In [359]:
df_final

Unnamed: 0,name,rating,review_date,reviewer,review_url,neighborhood,cuisine,recommendations,review,headline
0,Hanon,2 star,2019-05-21,Pete Wells,https://www.nytimes.com/2019/05/21/dining/hano...,Williamsburg,Japanese,Japanese omelet; fried chicken with spice; ric...,"Hanon, a new udon shop in Williamsburg, Brookl...","Udon, Innovated for Your Pleasure"
1,Del Posto,3 star,2019-05-14,Pete Wells,https://www.nytimes.com/2019/05/14/dining/del-...,Chelsea,Italian,Grilled salsify salad; vitello tonnato; minest...,"More than a year ago, I was on the verge of re...","In Del Posto’s New Era, Cuisine and Service Ar..."
2,The Freakin Rican,1 star,2019-05-07,Pete Wells,https://www.nytimes.com/2019/05/07/dining/the-...,Astoria,"Caribbean, Latin American",Alcapurrias; pasteles; bacalaitos; chicharrone...,It is true that the bacalaitos at the Freakin ...,Pasteles and Two Kinds of Fried Chicken at the...
3,Wayan,2 star,2019-04-23,Pete Wells,https://www.nytimes.com/2019/04/23/dining/waya...,NoLIta,Indonesian,Hearts-of-palm salad; clams Jimbaran style; av...,If you ate at Spice Market before it closed tw...,Tapping a Family Connection to Indonesian Food
4,Niche,1 star,2019-04-16,Pete Wells,https://www.nytimes.com/2019/04/16/dining/nich...,Lower East Side,Japanese,Avocado crunch; yuzu scallop crudo; umami komb...,"In 2012 and 2013, when people were lining up a...",Ramen Without Broth? A Chef Doubles Down on a ...
5,Haenyeo,2 star,2019-04-09,Pete Wells,https://www.nytimes.com/2019/04/09/dining/haen...,Park Slope,Korean,"Crispy chicken wings; zucchini, perilla leaf a...",Looking around Haenyeo’s dining room one night...,"With Haenyeo, a Trailblazing Korean Chef Takes..."
6,Standard Grill,2 star,2019-04-02,Pete Wells,https://www.nytimes.com/2019/04/02/dining/stan...,West Village,American,Peconic Bay scallops and uni; wild yellowfin t...,I’m starting to think the American cult of che...,"The TV Chef Rocco DiSpirito Returns, With a Qu..."
7,Violet,1 star,2019-03-26,Pete Wells,https://www.nytimes.com/2019/03/26/dining/viol...,East Village,"Italian, Pizza",Pimentón bacalao; smoked mackerel pâté; grille...,When I left my home state of Rhode Island at 1...,What Has New York Pizza Been Missing? Little O...
8,Odo,3 star,2019-03-19,Pete Wells,https://www.nytimes.com/2019/03/19/dining/odo-...,Flatiron district,Japanese,The set menus change monthly and offer few cho...,Before I tell you about the seasonally attuned...,"Kaiseki, Straight Up With a Twist"
9,Cka Ka Qellu,2 star,2019-03-12,Pete Wells,https://www.nytimes.com/2019/03/12/dining/cka-...,Belmont,Eastern European,Pickled pepper; sausage dip; tarator; kajmak; ...,The best way to enter the Albanian restaurant ...,"A Deep-End Albanian Experience, Tucked Into th..."


In [360]:
for revew in df_final['headline']:
    print(revew)

Udon, Innovated for Your Pleasure
In Del Posto’s New Era, Cuisine and Service Are at Odds
Pasteles and Two Kinds of Fried Chicken at the Freakin Rican
Tapping a Family Connection to Indonesian Food
Ramen Without Broth? A Chef Doubles Down on a Sidelined Dish
With Haenyeo, a Trailblazing Korean Chef Takes On Seafood
The TV Chef Rocco DiSpirito Returns, With a Quieter Touch
What Has New York Pizza Been Missing? Little Old Rhode Island
Kaiseki, Straight Up With a Twist
A Deep-End Albanian Experience, Tucked Into the Bronx
A Beef Feast From Vietnam Gets a New York Showcase
Can a Pop-Up Settle Down Without Losing Its Fizz?
Serving All the Food Nouvelle Cuisine Couldn’t Kill
Refined British Restaurant Found Hiding in a Brooklyn Bar
Benno, Proudly Out of Step With the Age
Why Does This Fancy Shopping Mall Smell Like Street Meat?
Hwaban Is the Modern Korean Restaurant Where You’d Take Your Mother
What If Brexit Were a Restaurant?
The Four Seasons Returns. But Can It Come Back?
Seafood From Two

In [361]:
df_final.to_csv('final_df_v3.csv')

In [365]:
review = df_final.iloc[134]['review']

In [366]:
import spacy
from spacy.lang.en import English
from spacy.lang.en.stop_words import STOP_WORDS
import en_core_web_sm

stop_entities = ['GPE', 'CARDINAL', 'PERSON', 'DATE']

def remove_named_entities(review):
    
    # Get Entities
    nlp = English()
    nlp = en_core_web_sm.load()
    
    doc = nlp(review)
    
    stop_entity_list = []
    entities=[(i, i.label_, i.label) for i in doc.ents]
    
    for entity in entities:
        
        if entity[1] in stop_entities:
            stop_entity_list.append(str(entity[0]).lower())

    return stop_entity_list

remove_named_entities(review)

['france',
 'half',
 'gérard depardieu',
 'jeggings',
 'france',
 'new york',
 'benoit',
 'alain ducasse',
 'century-old',
 'paris',
 'a few years',
 'midtown',
 'benoit',
 'frank bruni',
 '2008',
 'one',
 'a year later',
 'julia moskin',
 'ducasse',
 'benoit',
 'two',
 'one',
 '10',
 'bertineau',
 'argentina',
 'regulars',
 'opening day',
 'five',
 'france',
 '15 or 45 years ago',
 'benoit',
 '1892',
 'bertineau',
 'alain',
 'new york',
 'armagnac',
 'tatins',
 'ducasse',
 'new york',
 'manhattan']