# 2.01 - Yelp API - Business ID Search

In [1]:
import os
os.chdir('../../')

In [2]:
from sklearn.externals import joblib
from urllib import urlencode
import lib.yelp_api as yp
import pandas as pd
import requests
import json
import re

### <span style="color:teal"> Load in Group </span>

In [3]:
yp.obtain_bearer_token()

'_EbMBoICeTNLSZw6cbRNgcJHyp7LeR41HXv7d8Gy0QaYLENkbfBgDRhLtsYo4hTTTx6r443x_yOsQ5Qk22DimUZ66srRcrgWiBk4eBn293T2q6kiKFJRP_7FvT7dWHYx'

In [4]:
yelp_df = pd.read_pickle('data/mtch_srch_rslts_pd.pkl')

### <span style="color:teal"> Business ID Requests </span>

In [5]:
bus_ids = yelp_df['bus_id'].values

In [6]:
bus_ids = bus_ids[:1]

In [7]:
failures, successes = yp.yelp_api_bus_id_calls(bus_ids)

In [6]:
joblib.dump(failures, 'data/yelp_api_bus_id_failures.pkl')
joblib.dump(successes, 'data/yelp_api_bus_id_successes.pkl')

['data/yelp_api_bus_id_successes.pkl']

In [7]:
failures = joblib.load('data/yelp_api_bus_id_failures.pkl')

In [8]:
successes = joblib.load('data/yelp_api_bus_id_successes.pkl')

In [8]:
successes

['\n\n\n<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">\n<html>\n<head>\n    <title>Yelp Captcha</title>\n    <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">\n    <meta http-equiv="Pragma" content="no-cache">\n    <meta http-equiv="Cache-Control" content="no-cache">\n    <meta http-equiv="Expires" content="0">\n    <link rel="shortcut icon" href="/favicon.ico" type="image/ico">\n\n<style type="text/css">\n    html, body, div, span, applet, object, iframe,\n    h1, h2, h3, h4, h5, h6, p, blockquote, pre,\n    a, abbr, acronym, address, big, cite, code,\n    del, dfn, em, font, img, ins, kbd, q, s, samp,\n    small, strike, strong, sub, sup, tt, var,\n    dl, dt, dd, ol, ul, li,\n    fieldset, form, label, legend,\n    table, caption, tbody, tfoot, thead, tr, th, td {\n        margin: 0;\n        padding: 0;\n        border: 0;\n        outline: 0;\n        font-weight: inherit;\n        font-style: inherit;\n        fon

In [6]:
# 'my-moms-bakeshop-los-angeles' kept failing--it looks like the url doesn't exist.

In [7]:
# joblib.dump(ratings, 'data/yelp_api_ratings_all.pkl')

In [8]:
ratings = joblib.load('data/yelp_api_ratings_all.pkl')

In [9]:
rating_dict = {}
for key, value in ratings.items():
    value_list = []
    value_list.append(value[0])
    value_list.append(value[1])
    value_set = set(value)
    for v in value_set:
        try:
            value_list.append(v[0])
            value_list.append(v[1])
            value_list.append(v[2])
        except:
            pass
        
        rating_dict[key] = value_list

In [10]:
columns_tuples = [('author_'+str(i), 'post_date_'+str(i), 'author_rating_'+str(i)) for i in range(0,40)]
individual_columns = [col for tup in columns_tuples for col in tup]

In [11]:
columns = ['num_reviews', 'avg_rating'] + individual_columns
reviews_df = pd.DataFrame(rating_dict.values(), 
                          index = rating_dict.keys(), 
                          columns = columns)

In [12]:
reviews_df.head()

Unnamed: 0,num_reviews,avg_rating,author_0,post_date_0,author_rating_0,author_1,post_date_1,author_rating_1,author_2,post_date_2,...,author_rating_36,author_37,post_date_37,author_rating_37,author_38,post_date_38,author_rating_38,author_39,post_date_39,author_rating_39
edibol-los-angeles,311,4.250804,Tanner D.,2015-08-01,5.0,Chelsie N.,2017-01-12,5.0,Felor H.,2017-02-28,...,5.0,Nicol G.,2015-07-28,5.0,Elena H.,2015-07-25,5.0,Jackie S.,2015-07-19,2.0
thai-dishes-los-angeles,307,3.361564,Fara A.,2008-02-23,3.0,Kevin S.,2017-02-28,5.0,Robynn C.,2017-02-12,...,1.0,Valarie S.,2017-02-10,5.0,Maria V.,2017-02-28,3.0,Seeso C.,2008-06-30,5.0
brew-you-los-angeles,54,3.796296,Jaimie L.,2011-07-03,4.0,Jane L.,2014-04-12,5.0,Peter W.,2013-02-15,...,1.0,John F.,2015-03-22,4.0,Mark C.,2013-04-25,4.0,Ibrahim C.,2013-01-11,5.0
lins-chinese-cuisine-los-angeles,74,2.945946,Raechel R.,2010-04-20,4.0,Queue H.,2015-08-19,4.0,J. W.,2007-06-18,...,5.0,Rosa C.,2007-12-18,1.0,Kelly H.,2010-10-10,2.0,Holly M.,2015-08-08,3.0
little-bear-los-angeles,620,3.643548,Ellie B.,2017-01-08,2.0,Alyssa G.,2017-02-20,5.0,Leona V.,2017-01-17,...,4.0,Hemang S.,2012-01-06,5.0,Vanessa S.,2016-11-19,4.0,Foster K.,2012-01-11,5.0


In [13]:
yelp_data_reviews_df = pd.merge(yelp_df, reviews_df, 
                                left_index=True, 
                                right_index=True)

In [14]:
yelp_data_reviews_df.head()

Unnamed: 0,search_name,search_address,bus_id,name,price,cat_1,cat_2,closed,address,city,...,author_rating_36,author_37,post_date_37,author_rating_37,author_38,post_date_38,author_rating_38,author_39,post_date_39,author_rating_39
101-asian-kitchen-los-angeles,WOK MASTER,7170 BEVERLY BLVD,101-asian-kitchen-los-angeles,101 Asian Kitchen,$$,chinese,sushi,False,7170 Beverly Blvd,Los Angeles,...,2.0,,,,,,,,,
10k-cho-man-won-los-angeles-2,CHO MAN WON,2881 W OLYMPIC BLVD,10k-cho-man-won-los-angeles-2,10K+ Cho Man Won,$$,chinese,noodles,False,2881 W Olympic Blvd,Los Angeles,...,,,,,,,,,,
1739-public-house-los-angeles-2,PUBLIC HOUSE BOLA ELECTRIC LOTUS,1739 N VERMONT AVE,1739-public-house-los-angeles-2,1739 Public House,$$,tradamerican,pubs,False,1739 N Vermont Ave,Los Angeles,...,2.0,Denise L.,2017-01-16,3.0,Stacy D.,2009-01-25,2.0,Mr. B.,2017-03-04,4.0
1810-argentinean-restaurant-los-angeles-2,1810 RESTAURANT,105 W 9TH ST,1810-argentinean-restaurant-los-angeles-2,1810 Argentinean Restaurant,$$,argentine,argentine,False,105 W 9th St,Los Angeles,...,4.0,Dennis V.,2016-12-11,4.0,Bubby O.,2014-08-19,3.0,Marlene G.,2014-08-01,4.0
1st-wok-los-angeles,1 WOK,5565 W MANCHESTER AVE,1st-wok-los-angeles,1st Wok,$,chinese,chinese,False,5565 W Manchester Ave,Los Angeles,...,4.0,Maria M.,2014-04-03,5.0,Reese M.,2013-01-30,3.0,Dan G.,2011-08-16,3.0


In [15]:
pd.to_pickle(yelp_data_reviews_df, 'data/yelp_data_reviews.pkl')