# Data Exploration and NLP Modeling 
## By BROSSEAU Alexandre & COGORDAN Alexandre

In [9]:
import nltk
import requests
import time
import pandas as pd
import os
import re
import gensim
import gensim.corpora as corpora
import spacy
import pyLDAvis
import pyLDAvis.gensim
import matplotlib.pyplot as plt
import streamlit as st
import numpy as np
import tensorflow as tf
import tensorboard as tb

from scipy.spatial.distance import euclidean
from scipy.spatial.distance import cosine
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.util import ngrams
from textblob import TextBlob
from collections import Counter
from transformers import pipeline
from langchain import PromptTemplate, LLMChain
from dotenv import find_dotenv, load_dotenv
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel, Word2Vec
from sklearn.manifold import TSNE
from tensorboard.plugins import projector

load_dotenv()

True

## Web scraping

### We get the requests and the dataframe we've created so far

In [10]:
df = pd.read_csv('yelp_reviews.csv', usecols=['text', 'rating', 'location'])

### We call our API key to start web scraping

In [11]:
api_key = os.getenv('YELP_API_KEY')
# headers = {'Authorization': 'Bearer ' + api_key}
headers = {'Authorization': 'Bearer ' + 'rjj-CYij9D5rBs0P7jLtSGO6u2hBp38A5jMZTGELqjvvojBqfepQac4jprE6LhD84IuHE9UuswLpOL46Ou8IA04Jd4Ije97e5rv5Q4Ifag8tQrMlJirC2-eTUH58ZXYx'}

### We get the businesses' IDs

In [12]:
def get_all_business_data(base_url):
    
    #all_business_ids = []
    list_of_businesses = []

    while True:
        response = requests.get(base_url, headers=headers)
        if response.status_code != 200:
            break  

        data = response.json()
        businesses = data.get('businesses', [])

        # business_info = data.get('businesses')

        if not businesses:
            break  # Break the loop if no more businesses are returned

        for business in businesses:
            #business_id = business.get('id') # 'name', 'price', 'url', 'review_count', 'display_address', 'image_url', 'display_phone', 'categories'
            business_dict = {'restaurant_id': business.get('id'), 'business_name': business.get('name'), 'business_price': business.get('price'), 'business_url': business.get('url'), 'business_review_count': business.get('review_count'), 'business_display_address': business.get('location').get('display_address'), 'business_image_url': business.get('image_url'), 'business_display_phone': business.get('display_phone'), 'business_categories': business.get('categories')}
            list_of_businesses.append(business_dict)
            #if business_id:
                #all_business_ids.append(business_id)

        # Update the offset in the URL for the next request
        if 'offset=' in base_url:
            base_url = base_url.rsplit('offset=', 1)[0] + f'offset={len(list_of_businesses)}'
        else:
            base_url += f'&offset={len(list_of_businesses)}'

        time.sleep(1)  

    return list_of_businesses

### We get the reviews from the business

In [13]:
def get_reviews(business_data, city):
    restaurant_ids = [restaurant['restaurant_id'] for restaurant in business_data]

    business_df = pd.DataFrame()
    count = 0
    
    for restaurant_id in restaurant_ids:
        url2 = "https://api.yelp.com/v3/businesses/" + restaurant_id + "/reviews?sort_by=yelp_sort"
        response = requests.get(url2, headers=headers)
        reviews_data = response.json()
        
        try:
            for review in reviews_data['reviews']:
                business_df.loc[count, 'text'] = review['text']
                business_df.loc[count, 'rating'] = review['rating']
                business_df.loc[count, 'location'] = city
                business_df.loc[count, 'restaurant_id'] = restaurant_id
                count += 1

        except:
            print("No reviews for this restaurant")
        
    return business_df

In [14]:
# cities = ['San+Diego', 'San+Jose', 'Seattle', 'Maryville', 'Salt+Lake+City', 'Oklahoma+City', 'Austin', 'Louisville', 'Indianapolis', 'Portland', 'Santa+Cruz', 'Jacksonville', 'Hudson', 'Dallas', 'Phoenix', 'Asheville']
# 'Memphis', 'Boston', 'Seattle', 'Denver', 'Washington', 'Nashville', 'Baltimore', 'Oklahoma+City', 'Louisville', 'Portland', 'Las+Vegas', 'Milwaukee', 'Albuquerque', 'Tucson', 'Fresno', 'Sacramento', 'Long+Beach', 'Kansas+City', 'Mesa'
# 'Atlanta', 'Raleigh', 'Miami', 'Omaha', 'Oakland', 'Tulsa', 'Minneapolis', 'Cleveland', 'Wichita', 'New+York', 'Los+Angeles', 'Chicago', 'Houston', 'Phoenix', 'Philadelphia', 'San+Antonio', 'San+Diego', 'Dallas', 'San+Jose', 'Austin', 'Jacksonville', 'Indianapolis', 'San+Francisco', 'Columbus', 'Fort+Worth', 'Charlotte', 'Detroit', 'El+Paso', 'Arlington', 'New+Orleans', 'Bakersfield', 'Tampa', 'Honolulu', 'Aurora', 'Anaheim', 'Santa+Ana', 'St.+Louis',
# 'Riverside', 'Corpus+Christi', 'Lexington', 'Pittsburgh', 'Anchorage', 'Stockton', 'Cincinnati', 'Saint+Paul', 'Toledo', 'Newark', 'Greensboro', 'Plano', 'Henderson', 'Lincoln', 'Buffalo', 'Fort+Wayne', 'Jersey+City', 'Chula+Vista', 'Orlando', 'St.+Petersburg', 'Norfolk', 'Chandler', 'Laredo', 'Madison', 'Durham', 'Lubbock', 'Winston-Salem', 'Garland', 'Glendale', 'Hialeah', 'Reno',
# 'Baton+Rouge', 'Irvine', 'Chesapeake', 'Irving', 'Scottsdale', 'North+Las+Vegas', 'Fremont', 'Gilbert', 'San+Bernardino', 'Boise', 'Birmingham', 'Rochester', 'Richmond', 'Spokane', 'Des+Moines', 'Montgomery', 'Modesto', 'Fayetteville', 'Tacoma', 'Shreveport', 'Fontana', 'Oxnard', 'Aurora', 'Moreno+Valley', 'Akron', 'Yonkers', 'Columbus', 'Augusta', 'Little+Rock', 'Amarillo', 'Mobile', 'Huntington+Beach', 'Glendale', 'Grand+Rapids', 'Salt+Lake+City', 'Tallahassee', 'Huntsville', 'Worcester', 'Knoxville', 'Grand+Prairie', 'Newport+News', 'Brownsville', 'Santa+Clarita', 'Overland+Park', 'Providence', 'Garden+Grove', 'Chattanooga', 'Oceanside', 'Santa+Rosa', 'Fort+Lauderdale',

cities = [
    'Rancho+Cucamonga', 'Port+St.+Lucie',
    'Ontario', 'Vancouver', 'Tempe', 'Springfield', 'Lancaster', 'Eugene', 'Pembroke+Pines', 'Salem', 'Cape+Coral',
    'Peoria', 'Sioux+Falls', 'Springfield', 'Elk+Grove', 'Rockford', 'Palmdale', 'Corona', 'Salinas', 'Pomona', 'Pasadena',
    'Joliet', 'Paterson', 'Kansas+City', 'Torrance', 'Syracuse', 'Bridgeport', 'Hayward', 'Fort+Collins', 'Escondido',
    'Lakewood', 'Naperville', 'Dayton', 'Hollywood', 'Sunnyvale', 'Alexandria', 'Mesquite', 'Hampton', 'Pasadena',
    'Orange', 'Savannah', 'Cary', 'Fullerton', 'Warren', 'Clarksville', 'McKinney', 'McAllen', 'New+Haven', 'Sterling+Heights',
    'West+Valley+City', 'Columbia', 'Killeen', 'Topeka', 'Thousand+Oaks', 'Cedar+Rapids', 'Olathe', 'Elizabeth', 'Waco',
    'Hartford', 'Visalia', 'Gainesville', 'Simi+Valley', 'Stamford', 'Bellevue', 'Concord', 'Miramar', 'Coral+Springs',
    'Lafayette', 'Charleston', 'Carrollton', 'Roseville', 'Thornton', 'Beaumont', 'Allentown', 'Surprise', 'Evansville',
    'Abilene', 'Frisco', 'Independence', 'Santa+Clara', 'Springfield', 'Vallejo', 'Victorville', 'Athens', 'Peoria',
    'Lansing', 'Ann+Arbor', 'El+Monte', 'Denton', 'Berkeley', 'Provo', 'Downey', 'Midland', 'Norman', 'Waterbury',
    'Costa+Mesa', 'Inglewood', 'Manchester', 'Murfreesboro', 'Columbia', 'Elgin', 'Clearwater', 'Miami+Gardens',
    'Rochester', 'Pueblo', 'Lowell', 'Wilmington', 'Arvada', 'Ventura', 'Westminster', 'West+Covina', 'Gresham',
    'Fargo', 'Norwalk', 'Carlsbad', 'Fairfield', 'Cambridge', 'Wichita+Falls', 'High+Point', 'Billings', 'Green+Bay',
    'West+Jordan', 'Richmond', 'Murrieta', 'Burbank', 'Palm+Bay', 'Everett', 'Flint', 'Antioch', 'Erie', 'South+Bend',
    'Daly+City', 'Centennial', 'Temecula', 'Rialto', 'Thornton', 'El+Paso', 'San+Mateo', 'Midland', 'Davenport', 'Santa+Monica',
    'Sandy+Springs', 'Boulder', 'Hillsboro', 'Frisco', 'Greeley', 'San+Bernardino', 'Jurupa+Valley', 'Kenosha', 'Rochester',
    'Olathe', 'Newport+Beach', 'Topeka', 'Athens', 'Santa+Clarita', 'Simi+Valley', 'Columbia', 'Concord', 'Lafayette',
    'Charleston', 'Carrollton', 'Roseville', 'Thornton'
]

business_df = pd.DataFrame()
business_data = pd.DataFrame()

for city in cities:
    url = (f'https://api.yelp.com/v3/businesses/search?location={city}&term=restaurants&categories=french&price=3&price=4&sort_by=best_match')
    business_id = get_all_business_data(url)
    business_data = pd.concat([business_data, pd.DataFrame(business_id)], ignore_index=True)
    business_df = pd.concat([business_df, get_reviews(business_id, city)], ignore_index=True)

  business_df.loc[count, 'text'] = review['text']
  business_df.loc[count, 'location'] = city
  business_df.loc[count, 'restaurant_id'] = restaurant_id
  business_df.loc[count, 'text'] = review['text']
  business_df.loc[count, 'location'] = city
  business_df.loc[count, 'restaurant_id'] = restaurant_id
around. Lobster and steak - fabulous, both soups were superb mushroom & the corn bisque w/crab meat-yum,...' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  business_df.loc[count, 'text'] = review['text']
  business_df.loc[count, 'location'] = city
  business_df.loc[count, 'restaurant_id'] = restaurant_id
  business_df.loc[count, 'text'] = review['text']
  business_df.loc[count, 'location'] = city
  business_df.loc[count, 'restaurant_id'] = restaurant_id
  business_df.loc[count, 'text'] = review['text']
  business_df.loc[count, 'location'] = city
  business_df.loc[count, 'restaurant_id'] = restaurant_id
  business_df.loc[count, 'text'] = review[

No reviews for this restaurant


  business_df.loc[count, 'text'] = review['text']
  business_df.loc[count, 'location'] = city
  business_df.loc[count, 'restaurant_id'] = restaurant_id
  business_df.loc[count, 'text'] = review['text']
  business_df.loc[count, 'location'] = city
  business_df.loc[count, 'restaurant_id'] = restaurant_id


No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant


  business_df.loc[count, 'text'] = review['text']
  business_df.loc[count, 'location'] = city
  business_df.loc[count, 'restaurant_id'] = restaurant_id
 Another year, another unforgettable Thanksgiving at Le Stick Nouveau in charming Eureka Springs, Arkansas! 

Course 1: Waldorf salad...' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  business_df.loc[count, 'text'] = review['text']
  business_df.loc[count, 'location'] = city
  business_df.loc[count, 'restaurant_id'] = restaurant_id
  business_df.loc[count, 'text'] = review['text']
  business_df.loc[count, 'location'] = city
  business_df.loc[count, 'restaurant_id'] = restaurant_id
Great drinks are provided on the menu as well as great food...' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  business_df.loc[count, 'text'] = review['text']
  business_df.loc[count, 'location'] = city
  business_df.loc[count, 'restaurant_id'] = restaurant_id
  business_d

No reviews for this restaurant



Maddie was our waitress and took great care of us.  She was very attentive and knowledgeable about the menu and...' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  business_df.loc[count, 'text'] = review['text']
  business_df.loc[count, 'location'] = city
  business_df.loc[count, 'restaurant_id'] = restaurant_id
  business_df.loc[count, 'text'] = review['text']
  business_df.loc[count, 'location'] = city
  business_df.loc[count, 'restaurant_id'] = restaurant_id
around. Lobster and steak - fabulous, both soups were superb mushroom & the corn bisque w/crab meat-yum,...' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  business_df.loc[count, 'text'] = review['text']
  business_df.loc[count, 'location'] = city
  business_df.loc[count, 'restaurant_id'] = restaurant_id

The...' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  business_df.loc[count, 'text'] = review[

No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant
No reviews for this restaurant


Il faudra juste faire un merge demain!

In [15]:
business_df['location'].unique()
# 2-Ba1KvwdY7MZQ3CSqB_1b1G8L_yFEHMN3GKT9wJYQD7rcI6GMzwEH1Q9p_fkJ-SdB01Nd63EcOZrMtiEC63V9zLLqRIpwrz7q2ne5mUwZ-utvwdbEbIntIkAdKjZXYx

array(['Baton+Rouge', 'Irvine', 'Chesapeake', 'Irving', 'Scottsdale',
       'North+Las+Vegas', 'Fremont', 'Gilbert', 'San+Bernardino',
       'Birmingham', 'Rochester', 'Richmond', 'Spokane', 'Fayetteville',
       'Tacoma', 'Fontana', 'Oxnard', 'Aurora', 'Moreno+Valley', 'Akron',
       'Yonkers', 'Columbus', 'Mobile', 'Huntington+Beach', 'Glendale',
       'Grand+Rapids', 'Salt+Lake+City', 'Tallahassee', 'Worcester',
       'Knoxville', 'Grand+Prairie', 'Newport+News', 'Santa+Clarita',
       'Overland+Park', 'Providence', 'Garden+Grove', 'Chattanooga',
       'Oceanside', 'Santa+Rosa', 'Fort+Lauderdale'], dtype=object)

In [19]:
business_df.to_csv('df-export-six.csv', index=False)
business_data.to_csv('id-export-six.csv', index=False)

### Merge

In [153]:
ouput_dfs = []

cities = ['new_orleans', 'nyc', 'chicago', 'los_angeles', 'sf', 'philadelphia', 'las_vegas', 'houston', 'phoenix', 'miami']

for city in cities:
    reviews_list = globals()[f'{city}_list_of_reviews']
    ouput_df = pd.DataFrame(reviews_list, columns=['text', 'rating', 'location'])
    ouput_dfs.append(ouput_df)

output = pd.concat(ouput_dfs, ignore_index=True)
df = pd.concat([df, output], ignore_index=True)

In [157]:
df.drop_duplicates(inplace=True)
df['rating'].value_counts()

rating
5    367
4    134
3     69
2     29
1     19
Name: count, dtype: int64

In [105]:
df_data = pd.read_csv('df-export-five.csv')
df_id = pd.read_csv('id-export-five.csv')

In [106]:
df_data

Unnamed: 0,text,rating,location,restaurant_id
0,Wednesday afternoon in the Red Stick and I att...,5.0,Baton+Rouge,HOXNkb-MoWgTPcu7Z4fpSg
1,Visit #1: I was there 8-10-23 while in town mo...,4.0,Baton+Rouge,HOXNkb-MoWgTPcu7Z4fpSg
2,"Great food, excellent service.\nNice ambience,...",5.0,Baton+Rouge,HOXNkb-MoWgTPcu7Z4fpSg
3,Very quaint and intimate dining with linen tab...,5.0,Baton+Rouge,OjF3YRDrByfjAJeCISr0Vg
4,We loves dining at Maison Lacour so much. Such...,5.0,Baton+Rouge,OjF3YRDrByfjAJeCISr0Vg
...,...,...,...,...
1168,"They do fuzzy math when ""honoring"" a Groupon. ...",2.0,Fort+Lauderdale,R9R7ApmWwE9i-FqcyBBh3Q
1169,Parking - located in the Lauderdale By the Sea...,3.0,Fort+Lauderdale,R9R7ApmWwE9i-FqcyBBh3Q
1170,Wow ! Just awesome the zebra pattern chairs th...,5.0,Fort+Lauderdale,-QtB-fGXYKdaEQySVO28wg
1171,I went Wednesday night for the first time to d...,5.0,Fort+Lauderdale,-QtB-fGXYKdaEQySVO28wg


In [109]:
df_id

Unnamed: 0,restaurant_id,business_name,business_price,business_url,business_review_count,business_display_address,business_image_url,business_display_phone,business_categories
0,HOXNkb-MoWgTPcu7Z4fpSg,Mansurs on the Boulevard,$$$,https://www.yelp.com/biz/mansurs-on-the-boulev...,421,"['5720 Corporate Blvd', 'Ste A', 'Baton Rouge,...",https://s3-media4.fl.yelpcdn.com/bphoto/ixufmP...,(225) 923-3366,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ..."
1,OjF3YRDrByfjAJeCISr0Vg,Maison Lacour,$$$$,https://www.yelp.com/biz/maison-lacour-baton-r...,67,"['11025 N Harrells Ferry Rd', 'Baton Rouge, LA...",https://s3-media2.fl.yelpcdn.com/bphoto/Qey5_O...,(225) 275-3755,"[{'alias': 'french', 'title': 'French'}]"
2,FWPpQ1FH4f7AC0imO5kvQA,Latil's Landing Restaurant,$$$$,https://www.yelp.com/biz/latils-landing-restau...,26,"['40136 Hwy 942', 'Darrow, LA 70725']",https://s3-media2.fl.yelpcdn.com/bphoto/12LGNe...,(225) 473-9380,"[{'alias': 'newamerican', 'title': 'New Americ..."
3,aXtZEPaoyp8OangJLfCkLw,French Market Bistro,$$$,https://www.yelp.com/biz/french-market-bistro-...,133,"['16645 Highland Rd', 'Baton Rouge, LA 70810']",https://s3-media3.fl.yelpcdn.com/bphoto/rtweqn...,(225) 753-3500,"[{'alias': 'french', 'title': 'French'}, {'ali..."
4,xQQ0L3zha2-3zehLh889bw,Marché Moderne,$$$$,https://www.yelp.com/biz/march%C3%A9-moderne-n...,522,"['7862 Pacific Coast Hwy', 'Newport Beach, CA ...",https://s3-media1.fl.yelpcdn.com/bphoto/SN9AFV...,(714) 434-7900,"[{'alias': 'french', 'title': 'French'}, {'ali..."
...,...,...,...,...,...,...,...,...,...
416,GRZpClfFf3otLO2ridn5iA,Pascal's on Ponce,$$$$,https://www.yelp.com/biz/pascals-on-ponce-cora...,197,"['2611 Ponce De Leon Blvd', 'Coral Gables, FL ...",https://s3-media3.fl.yelpcdn.com/bphoto/UMdIqa...,(305) 444-2024,"[{'alias': 'french', 'title': 'French'}]"
417,kuOKfv-GFwiYb9qHsZkZgg,Villa Azur,$$$$,https://www.yelp.com/biz/villa-azur-miami-beac...,490,"['309 23rd St', 'Miami Beach, FL 33139']",https://s3-media2.fl.yelpcdn.com/bphoto/JaBWhe...,(305) 763-8688,"[{'alias': 'italian', 'title': 'Italian'}, {'a..."
418,2OfIFh_XjOJFmxlVHcTnCA,Le Rivage,$$$,https://www.yelp.com/biz/le-rivage-boca-raton?...,84,"['450 NE 20th St', 'Ste 103', 'Boca Raton, FL ...",https://s3-media2.fl.yelpcdn.com/bphoto/5TU4DW...,(561) 620-0033,"[{'alias': 'french', 'title': 'French'}, {'ali..."
419,K6RzqmTJ5yaI35Bw1BAE3g,Two Chef's,$$$,https://www.yelp.com/biz/two-chefs-coral-gable...,143,"['8287 S Dixie Hwy', 'Coral Gables, FL 33143']",https://s3-media1.fl.yelpcdn.com/bphoto/GY05B-...,(305) 663-2100,"[{'alias': 'newamerican', 'title': 'New Americ..."


In [108]:
df_id.drop_duplicates(inplace=True)

In [110]:
merged_df = df_data.merge(df_id, on='restaurant_id', how='left')

In [111]:
merged_df

Unnamed: 0,text,rating,location,restaurant_id,business_name,business_price,business_url,business_review_count,business_display_address,business_image_url,business_display_phone,business_categories
0,Wednesday afternoon in the Red Stick and I att...,5.0,Baton+Rouge,HOXNkb-MoWgTPcu7Z4fpSg,Mansurs on the Boulevard,$$$,https://www.yelp.com/biz/mansurs-on-the-boulev...,421,"['5720 Corporate Blvd', 'Ste A', 'Baton Rouge,...",https://s3-media4.fl.yelpcdn.com/bphoto/ixufmP...,(225) 923-3366,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ..."
1,Visit #1: I was there 8-10-23 while in town mo...,4.0,Baton+Rouge,HOXNkb-MoWgTPcu7Z4fpSg,Mansurs on the Boulevard,$$$,https://www.yelp.com/biz/mansurs-on-the-boulev...,421,"['5720 Corporate Blvd', 'Ste A', 'Baton Rouge,...",https://s3-media4.fl.yelpcdn.com/bphoto/ixufmP...,(225) 923-3366,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ..."
2,"Great food, excellent service.\nNice ambience,...",5.0,Baton+Rouge,HOXNkb-MoWgTPcu7Z4fpSg,Mansurs on the Boulevard,$$$,https://www.yelp.com/biz/mansurs-on-the-boulev...,421,"['5720 Corporate Blvd', 'Ste A', 'Baton Rouge,...",https://s3-media4.fl.yelpcdn.com/bphoto/ixufmP...,(225) 923-3366,"[{'alias': 'cajun', 'title': 'Cajun/Creole'}, ..."
3,Very quaint and intimate dining with linen tab...,5.0,Baton+Rouge,OjF3YRDrByfjAJeCISr0Vg,Maison Lacour,$$$$,https://www.yelp.com/biz/maison-lacour-baton-r...,67,"['11025 N Harrells Ferry Rd', 'Baton Rouge, LA...",https://s3-media2.fl.yelpcdn.com/bphoto/Qey5_O...,(225) 275-3755,"[{'alias': 'french', 'title': 'French'}]"
4,We loves dining at Maison Lacour so much. Such...,5.0,Baton+Rouge,OjF3YRDrByfjAJeCISr0Vg,Maison Lacour,$$$$,https://www.yelp.com/biz/maison-lacour-baton-r...,67,"['11025 N Harrells Ferry Rd', 'Baton Rouge, LA...",https://s3-media2.fl.yelpcdn.com/bphoto/Qey5_O...,(225) 275-3755,"[{'alias': 'french', 'title': 'French'}]"
...,...,...,...,...,...,...,...,...,...,...,...,...
1168,"They do fuzzy math when ""honoring"" a Groupon. ...",2.0,Fort+Lauderdale,R9R7ApmWwE9i-FqcyBBh3Q,Frenchy's Table,$$$,https://www.yelp.com/biz/frenchys-table-lauder...,213,"['235 Commercial Blvd', 'Ste 105', 'Lauderdale...",https://s3-media3.fl.yelpcdn.com/bphoto/sTmlhd...,(954) 533-2580,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a..."
1169,Parking - located in the Lauderdale By the Sea...,3.0,Fort+Lauderdale,R9R7ApmWwE9i-FqcyBBh3Q,Frenchy's Table,$$$,https://www.yelp.com/biz/frenchys-table-lauder...,213,"['235 Commercial Blvd', 'Ste 105', 'Lauderdale...",https://s3-media3.fl.yelpcdn.com/bphoto/sTmlhd...,(954) 533-2580,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a..."
1170,Wow ! Just awesome the zebra pattern chairs th...,5.0,Fort+Lauderdale,-QtB-fGXYKdaEQySVO28wg,Dirty French Steakhouse Miami,$$$$,https://www.yelp.com/biz/dirty-french-steakhou...,177,"['1200 Brickell Ave', 'Miami, FL 33131']",https://s3-media3.fl.yelpcdn.com/bphoto/2zlAEJ...,(305) 990-8707,"[{'alias': 'french', 'title': 'French'}, {'ali..."
1171,I went Wednesday night for the first time to d...,5.0,Fort+Lauderdale,-QtB-fGXYKdaEQySVO28wg,Dirty French Steakhouse Miami,$$$$,https://www.yelp.com/biz/dirty-french-steakhou...,177,"['1200 Brickell Ave', 'Miami, FL 33131']",https://s3-media3.fl.yelpcdn.com/bphoto/2zlAEJ...,(305) 990-8707,"[{'alias': 'french', 'title': 'French'}, {'ali..."


## Merging with pre-recorded data

In [112]:
output_df = pd.read_csv('reviews.csv')

In [113]:
output_df

Unnamed: 0,text,rating,location,restaurant_id,business_name,business_price,business_url,business_review_count,business_display_address,business_image_url,business_display_phone,business_categories
0,"Amazing food, service, and ambiance! \n\nMy fr...",5.0,San+Diego,YLV7ZZoyOAXW3eqidGXBZA,The Smoking Goat,$$$,https://www.yelp.com/biz/the-smoking-goat-san-...,1758,"['3408 30th St', 'San Diego, CA 92104']",https://s3-media3.fl.yelpcdn.com/bphoto/GNINLV...,(619) 955-5295,"[{'alias': 'french', 'title': 'French'}, {'ali..."
1,Casual enough place to pop in and get a table ...,4.0,San+Diego,YLV7ZZoyOAXW3eqidGXBZA,The Smoking Goat,$$$,https://www.yelp.com/biz/the-smoking-goat-san-...,1758,"['3408 30th St', 'San Diego, CA 92104']",https://s3-media3.fl.yelpcdn.com/bphoto/GNINLV...,(619) 955-5295,"[{'alias': 'french', 'title': 'French'}, {'ali..."
2,3.5 stars...\n\nArrived for our reservation an...,3.0,San+Diego,YLV7ZZoyOAXW3eqidGXBZA,The Smoking Goat,$$$,https://www.yelp.com/biz/the-smoking-goat-san-...,1758,"['3408 30th St', 'San Diego, CA 92104']",https://s3-media3.fl.yelpcdn.com/bphoto/GNINLV...,(619) 955-5295,"[{'alias': 'french', 'title': 'French'}, {'ali..."
3,"Great brunch experience here at Parc Bistro, b...",5.0,San+Diego,_zFYC91bOjXC9EdsT8VnVQ,Parc Bistro-Brasserie,$$$,https://www.yelp.com/biz/parc-bistro-brasserie...,777,"['2760 5th Ave', 'San Diego, CA 92103']",https://s3-media2.fl.yelpcdn.com/bphoto/nXMP3P...,(619) 795-1501,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a..."
4,Brunch today was borderline disastrous. Thing...,3.0,San+Diego,_zFYC91bOjXC9EdsT8VnVQ,Parc Bistro-Brasserie,$$$,https://www.yelp.com/biz/parc-bistro-brasserie...,777,"['2760 5th Ave', 'San Diego, CA 92103']",https://s3-media2.fl.yelpcdn.com/bphoto/nXMP3P...,(619) 795-1501,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a..."
...,...,...,...,...,...,...,...,...,...,...,...,...
3266,Great meal at Le Bistro. Javier and Paul serve...,5.0,Reno,nnVCFeMRLDDluZtabg9piA,Le Bistro,$$$,https://www.yelp.com/biz/le-bistro-incline-vil...,255,"['120 Country Club Dr', 'Ste 29', 'Incline Vil...",https://s3-media4.fl.yelpcdn.com/bphoto/OvQlUY...,(775) 831-0800,"[{'alias': 'french', 'title': 'French'}]"
3267,"As always, the food is amazing! This has becom...",5.0,Reno,nnVCFeMRLDDluZtabg9piA,Le Bistro,$$$,https://www.yelp.com/biz/le-bistro-incline-vil...,255,"['120 Country Club Dr', 'Ste 29', 'Incline Vil...",https://s3-media4.fl.yelpcdn.com/bphoto/OvQlUY...,(775) 831-0800,"[{'alias': 'french', 'title': 'French'}]"
3268,Gayle's Birthday Dinner - 12/15/23 \n5:30 PM R...,5.0,Reno,GKcaAbGsobMVhjSFktgVsA,Beaujolais Bistro,$$$,https://www.yelp.com/biz/beaujolais-bistro-ren...,326,"['753 Riverside Dr', 'Reno, NV 89503']",https://s3-media3.fl.yelpcdn.com/bphoto/EzDA28...,(775) 323-2227,"[{'alias': 'french', 'title': 'French'}, {'ali..."
3269,"I had high hopes for this place, especially on...",3.0,Reno,GKcaAbGsobMVhjSFktgVsA,Beaujolais Bistro,$$$,https://www.yelp.com/biz/beaujolais-bistro-ren...,326,"['753 Riverside Dr', 'Reno, NV 89503']",https://s3-media3.fl.yelpcdn.com/bphoto/EzDA28...,(775) 323-2227,"[{'alias': 'french', 'title': 'French'}, {'ali..."


In [114]:
output_df = pd.concat([output_df, merged_df], ignore_index=True)

In [115]:
output_df

Unnamed: 0,text,rating,location,restaurant_id,business_name,business_price,business_url,business_review_count,business_display_address,business_image_url,business_display_phone,business_categories
0,"Amazing food, service, and ambiance! \n\nMy fr...",5.0,San+Diego,YLV7ZZoyOAXW3eqidGXBZA,The Smoking Goat,$$$,https://www.yelp.com/biz/the-smoking-goat-san-...,1758,"['3408 30th St', 'San Diego, CA 92104']",https://s3-media3.fl.yelpcdn.com/bphoto/GNINLV...,(619) 955-5295,"[{'alias': 'french', 'title': 'French'}, {'ali..."
1,Casual enough place to pop in and get a table ...,4.0,San+Diego,YLV7ZZoyOAXW3eqidGXBZA,The Smoking Goat,$$$,https://www.yelp.com/biz/the-smoking-goat-san-...,1758,"['3408 30th St', 'San Diego, CA 92104']",https://s3-media3.fl.yelpcdn.com/bphoto/GNINLV...,(619) 955-5295,"[{'alias': 'french', 'title': 'French'}, {'ali..."
2,3.5 stars...\n\nArrived for our reservation an...,3.0,San+Diego,YLV7ZZoyOAXW3eqidGXBZA,The Smoking Goat,$$$,https://www.yelp.com/biz/the-smoking-goat-san-...,1758,"['3408 30th St', 'San Diego, CA 92104']",https://s3-media3.fl.yelpcdn.com/bphoto/GNINLV...,(619) 955-5295,"[{'alias': 'french', 'title': 'French'}, {'ali..."
3,"Great brunch experience here at Parc Bistro, b...",5.0,San+Diego,_zFYC91bOjXC9EdsT8VnVQ,Parc Bistro-Brasserie,$$$,https://www.yelp.com/biz/parc-bistro-brasserie...,777,"['2760 5th Ave', 'San Diego, CA 92103']",https://s3-media2.fl.yelpcdn.com/bphoto/nXMP3P...,(619) 795-1501,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a..."
4,Brunch today was borderline disastrous. Thing...,3.0,San+Diego,_zFYC91bOjXC9EdsT8VnVQ,Parc Bistro-Brasserie,$$$,https://www.yelp.com/biz/parc-bistro-brasserie...,777,"['2760 5th Ave', 'San Diego, CA 92103']",https://s3-media2.fl.yelpcdn.com/bphoto/nXMP3P...,(619) 795-1501,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a..."
...,...,...,...,...,...,...,...,...,...,...,...,...
4439,"They do fuzzy math when ""honoring"" a Groupon. ...",2.0,Fort+Lauderdale,R9R7ApmWwE9i-FqcyBBh3Q,Frenchy's Table,$$$,https://www.yelp.com/biz/frenchys-table-lauder...,213,"['235 Commercial Blvd', 'Ste 105', 'Lauderdale...",https://s3-media3.fl.yelpcdn.com/bphoto/sTmlhd...,(954) 533-2580,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a..."
4440,Parking - located in the Lauderdale By the Sea...,3.0,Fort+Lauderdale,R9R7ApmWwE9i-FqcyBBh3Q,Frenchy's Table,$$$,https://www.yelp.com/biz/frenchys-table-lauder...,213,"['235 Commercial Blvd', 'Ste 105', 'Lauderdale...",https://s3-media3.fl.yelpcdn.com/bphoto/sTmlhd...,(954) 533-2580,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a..."
4441,Wow ! Just awesome the zebra pattern chairs th...,5.0,Fort+Lauderdale,-QtB-fGXYKdaEQySVO28wg,Dirty French Steakhouse Miami,$$$$,https://www.yelp.com/biz/dirty-french-steakhou...,177,"['1200 Brickell Ave', 'Miami, FL 33131']",https://s3-media3.fl.yelpcdn.com/bphoto/2zlAEJ...,(305) 990-8707,"[{'alias': 'french', 'title': 'French'}, {'ali..."
4442,I went Wednesday night for the first time to d...,5.0,Fort+Lauderdale,-QtB-fGXYKdaEQySVO28wg,Dirty French Steakhouse Miami,$$$$,https://www.yelp.com/biz/dirty-french-steakhou...,177,"['1200 Brickell Ave', 'Miami, FL 33131']",https://s3-media3.fl.yelpcdn.com/bphoto/2zlAEJ...,(305) 990-8707,"[{'alias': 'french', 'title': 'French'}, {'ali..."


In [116]:
output_df.to_csv('reviews.csv', index=False)