# 2.00 - Yelp API Search

In [1]:
import os
os.chdir('../../')

In [2]:
from sklearn.externals import joblib
from urllib import urlencode, quote
import lib.yelp_api as yp
import pandas as pd
import requests
import json

## <span style="color:teal"> Load in Data for Yelp Search </span>

In [3]:
rest_df = joblib.load('data/la_rest_group_df.pkl')

In [4]:
rest_df.head(1)

Unnamed: 0,facility,address,city,zip_cd,num_at_address,max_seats,phone,risk_level,last_inspect_date,inspect_score,cat,closed_est,year
0,#1 CAFE,2080 CENTURY PARK E,LOS ANGELES,90067,1,30,+1nan,3,2016-08-18,90.0,,0,2016


## <span style="color:teal"> Yelp API Request </span>

### <span style="color:teal"> Phone Requests </span>

In [5]:
process_list = []

In [6]:
process_list, phone_search_list = yp.create_searches(rest_df, 'phone', process_list)

In [7]:
# phone_search_dict, unsearchbl_phones = yp.yelp_api_calls(phone_search_list, 'phone')

In [8]:
# yelp_phone_search_fail = yp.find_failed_searches(phone_search_dict, 'phone', 
#                                                  process_list, 'x')

In [9]:
phone_search_dict = yp.load_search_dict('x', 'phone')

In [10]:
phone_format_list = yp.format_search_dict(phone_search_dict)

In [11]:
phone_format_list, unknown_phone = yp.pick_correct_matches(phone_format_list, 'phone', process_list)

In [12]:
phone_search_df = yp.create_dataframe_of_matches(phone_format_list, 'phone', process_list)
phone_search_df.sort_values('search_name').head(5)

Unnamed: 0,search_name,search_address,match,bus_id,name,price,cat_1,cat_2,closed,address,city,zip_code,latitude,longitude
0,#2 MOON BBQ,478 N WESTERN AVE LOS ANGELES 90004,1.666667,12-moon-b-b-q-all-you-can-eat-los-angeles,#12 Moon B.B.Q All You Can Eat,,restaurants,food,False,478 N Western Ave,Los Angeles,90004,34.083132,-118.311106
1,#2 MOON BBQ,478 N WESTERN AVE LOS ANGELES 90004,1.666667,moon-daepo-bbq-2-los-angeles,Moon Daepo BBQ 2,$$,korean,bbq,False,478 N Western Ave,Los Angeles,90004,34.079609,-118.308664
2,*CUORE DELL AMANTE,123 9TH ST LOS ANGELES 90015,1.666667,cuore-dell-amante-los-angeles-3,Cuore Dell Amante,$$,italian,pizza,False,123 E 9th St,Los Angeles,90015,34.04117,-118.25416
3,1 WOK,5565 W MANCHESTER AVE LOS ANGELES 90045,2.0,1st-wok-los-angeles,1st Wok,$,chinese,chinese,False,5565 W Manchester Ave,Los Angeles,90045,33.960025,-118.378183
4,10880 WILSHIRE TRIMANA,10880 WILSHIRE BLVD LOS ANGELES 90024,1.333333,trimana-los-angeles-17,Trimana,$,sandwiches,sandwiches,False,10880 Wilshire Blvd,Los Angeles,90024,34.058468,-118.442909


### <span style="color:teal"> Term/Location Requests </span>

In [13]:
term_loc_searches = yp.create_searches(rest_df, 'term_loc', 
                                       process_list, optional_data=phone_search_df)

In [14]:
# term_loc_search_dict, unsearchable_term_loc = yp.yelp_api_calls(term_loc_searches, 'term_loc')

In [15]:
# term_loc_search_fail = yp.find_failed_searches(term_loc_search_dict, 'term_loc', 
#                                                process_list, 'x')

In [16]:
term_loc_search_dict = yp.load_search_dict('x', 'term_loc')

In [17]:
term_loc_format_list = yp.format_search_dict(term_loc_search_dict)

In [18]:
process_list = joblib.load('data/process_list.pkl')

In [19]:
term_loc_format_list, unknown = yp.pick_correct_matches(term_loc_format_list, 'term_loc', process_list)

In [20]:
term_loc_search_df = yp.create_dataframe_of_matches(term_loc_format_list, 'term_loc', process_list)
term_loc_search_df.sort_values('search_name').head(5)

Unnamed: 0,search_name,search_address,match,bus_id,name,price,cat_1,cat_2,closed,address,city,zip_code,latitude,longitude
0,#1 CAFE,2080 CENTURY PARK E LOS ANGELES 90067,1.5,one-cafe-los-angeles-2,One Cafe,$$,cafes,cafes,False,2080 Century Park E,Los Angeles,90067,34.058765,-118.412033
1,10 - E,811 W 7TH ST LOS ANGELES 90017,2.0,10e-los-angeles-6,10e,$$,mediterranean,armenian,False,811 W 7th St,Los Angeles,90017,34.048981,-118.258957
2,101 ASIAN KITCHEN,7170 BEVERLY BLVD LOS ANGELES 90036,2.0,101-asian-kitchen-los-angeles,101 Asian Kitchen,$$,chinese,sushi,False,7170 Beverly Blvd,Los Angeles,90036,34.07588,-118.34573
3,101 COFFEE SHOP,6145 FRANKLIN AVE LOS ANGELES 90068,2.0,101-coffee-shop-los-angeles,101 Coffee Shop,$$,diners,tradamerican,False,6145 Franklin Ave,Los Angeles,90028,34.105375,-118.323746
4,1642,1642 W TEMPLE ST LOS ANGELES 90026,2.0,1642-beer-and-wine-los-angeles,1642 Beer and Wine,$$,bars,bars,False,1642 W Temple St,Los Angeles,90026,34.068254,-118.26235


In [21]:
process_list

[['The total Yelp search is 9090 restaurants'],
 ['4613 restaurants provided a phone number (50.75%)'],
 ['2225 successful matches from phone searches (24.48%)'],
 ['6865 remaining restaurants to search with term/location(75.52%)'],
 ['6337 restaurant term/location searches yielded a response from yelp (69.71%)'],
 ['3702 successful matches from term/location searches (40.73%)'],
 ['4144 successful matches from term/location searches (45.59%)'],
 ['6369 successful matches from all searches (70.07%)']]

## <span style="color:teal"> Concatenate DataFrames </span>

In [22]:
all_srch_rslts = pd.concat([term_loc_search_df, phone_search_df])
all_srch_rslts.head()

Unnamed: 0,search_name,search_address,match,bus_id,name,price,cat_1,cat_2,closed,address,city,zip_code,latitude,longitude
0,#1 CAFE,2080 CENTURY PARK E LOS ANGELES 90067,1.5,one-cafe-los-angeles-2,One Cafe,$$,cafes,cafes,False,2080 Century Park E,Los Angeles,90067,34.058765,-118.412033
1,10 - E,811 W 7TH ST LOS ANGELES 90017,2.0,10e-los-angeles-6,10e,$$,mediterranean,armenian,False,811 W 7th St,Los Angeles,90017,34.048981,-118.258957
2,101 ASIAN KITCHEN,7170 BEVERLY BLVD LOS ANGELES 90036,2.0,101-asian-kitchen-los-angeles,101 Asian Kitchen,$$,chinese,sushi,False,7170 Beverly Blvd,Los Angeles,90036,34.07588,-118.34573
3,101 COFFEE SHOP,6145 FRANKLIN AVE LOS ANGELES 90068,2.0,101-coffee-shop-los-angeles,101 Coffee Shop,$$,diners,tradamerican,False,6145 Franklin Ave,Los Angeles,90028,34.105375,-118.323746
4,1642,1642 W TEMPLE ST LOS ANGELES 90026,2.0,1642-beer-and-wine-los-angeles,1642 Beer and Wine,$$,bars,bars,False,1642 W Temple St,Los Angeles,90026,34.068254,-118.26235


## <span style="color:teal"> Read in Yelp Categories </span>

In [23]:
categories = pd.read_json('data/categories.json')

In [24]:
cat_alias = categories['alias'].values
cat_prnts = categories['parents'].values

cat_list = zip(cat_alias, cat_prnts)

In [25]:
cat_dict = {}

for i in cat_list:
    cat_alias = i[0]
    cat_parent = i[1]
    if cat_parent == []:
        pass
    else:
        cat_dict[cat_alias] = cat_parent

In [26]:
rest_food_dict = {}

for key, value in cat_dict.items():
    if 'restaurants' in value:
        rest_food_dict[key] = value

In [27]:
relevant_food_cat = ['coffee','delicatessen','bagels','milkshakebars',
                     'poke','shavedice','bento','tea','bakeries','donuts',
                     'gourmet','desserts','cupcakes','churros','empanadas',
                     'smokehouse','pretzels','acaibowls','icecream','gelato',
                     'ramen']

for cat in relevant_food_cat:
    rest_food_dict[cat] = ['food']

## <span style="color:teal"> Trim DataFrame to Only Represent Restaurants and Food </span>

In [28]:
def filter_food_rest(x):
    if x in rest_food_dict:
        return 1
    else:
        return 0

all_srch_rslts['rst_fd_1'] = all_srch_rslts['cat_1'].apply(filter_food_rest)
all_srch_rslts['rst_fd_2'] = all_srch_rslts['cat_2'].apply(filter_food_rest)
all_srch_rslts['rst_fd'] = all_srch_rslts['rst_fd_2'] + all_srch_rslts['rst_fd_1']

In [29]:
mtch_srch_rslts = all_srch_rslts[(all_srch_rslts['match'] > 1) & 
                                 (all_srch_rslts['rst_fd'] > 0)]

In [30]:
mtch_srch_rslts = mtch_srch_rslts[['search_name', 'search_address', 'bus_id',
                                   'name', 'price', 'cat_1', 'cat_2', 'closed',
                                   'address', 'city', 'zip_code', 'latitude',
                                   'longitude']]

In [31]:
mtch_srch_rslts.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5892 entries, 0 to 2460
Data columns (total 13 columns):
search_name       5892 non-null object
search_address    5892 non-null object
bus_id            5892 non-null object
name              5892 non-null object
price             5693 non-null object
cat_1             5892 non-null object
cat_2             5892 non-null object
closed            5892 non-null bool
address           5892 non-null object
city              5892 non-null object
zip_code          5892 non-null object
latitude          5892 non-null float64
longitude         5892 non-null float64
dtypes: bool(1), float64(2), object(10)
memory usage: 604.2+ KB


In [32]:
sum(mtch_srch_rslts['closed'])*1.0/len(mtch_srch_rslts['closed'])

0.10675492192803802

In [33]:
mtch_srch_rslts.head()

Unnamed: 0,search_name,search_address,bus_id,name,price,cat_1,cat_2,closed,address,city,zip_code,latitude,longitude
0,#1 CAFE,2080 CENTURY PARK E LOS ANGELES 90067,one-cafe-los-angeles-2,One Cafe,$$,cafes,cafes,False,2080 Century Park E,Los Angeles,90067,34.058765,-118.412033
1,10 - E,811 W 7TH ST LOS ANGELES 90017,10e-los-angeles-6,10e,$$,mediterranean,armenian,False,811 W 7th St,Los Angeles,90017,34.048981,-118.258957
2,101 ASIAN KITCHEN,7170 BEVERLY BLVD LOS ANGELES 90036,101-asian-kitchen-los-angeles,101 Asian Kitchen,$$,chinese,sushi,False,7170 Beverly Blvd,Los Angeles,90036,34.07588,-118.34573
3,101 COFFEE SHOP,6145 FRANKLIN AVE LOS ANGELES 90068,101-coffee-shop-los-angeles,101 Coffee Shop,$$,diners,tradamerican,False,6145 Franklin Ave,Los Angeles,90028,34.105375,-118.323746
5,1880 CAFE,1880 CENTURY PARK E LOS ANGELES 90067,1880-cafe-los-angeles,1880 Cafe,$,newamerican,sandwiches,False,1880 Century Park E,Los Angeles,90067,34.06184,-118.414243


In [34]:
pd.to_pickle(mtch_srch_rslts, 'data/mtch_srch_rslts_pd.pkl')

## <span style="color:teal"> Inspect Yelp Search Failures </span>

In [35]:
failures = yp.create_searches(rest_df, 'term_loc', 
                              process_list, optional_data=mtch_srch_rslts)

In [36]:
failure_1 = list(failures)[40:41]

In [37]:
failure_2 = list(failures)[5:6]

In [39]:
call_1 = yp.yelp_api_calls(failure_1, 'term_loc')

In [40]:
call_2 = yp.yelp_api_calls(failure_2, 'term_loc')