## Yelp Rating Data Query
* using gfairchild yelpai's implementation of Yelp Fusion API

### Dependencies

In [1]:
## Import Dependencies
import pandas as pd
from yelpapi import YelpAPI

from config import api_key 

In [2]:
## Yelp API
yelp_api = YelpAPI(api_key, timeout_s=2.0)

### List of Businesses to Search in Yelp

In [3]:
# test data
inspection_data_path = "../Resources/TestData_Yelp.csv"
df = pd.read_csv(inspection_data_path)
df

Unnamed: 0,Inspection ID,DBA Name,AKA Name,License #,Facility Type,Risk,Address,City,State,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude,Location
0,2362315,7-ELEVEN,7-ELEVEN,2704169,Restaurant,Risk 2 (Medium),177 N WELLS ST,CHICAGO,IL,60606,2/13/2020,License,Pass,,41.885257,-87.633770,"(41.88525719405359, -87.63376979601067)"
1,2362304,MIDLANE ESPORTS,MIDLANE ESPORTS,2684099,Restaurant,Risk 2 (Medium),2741 N MILWAUKEE AVE,CHICAGO,IL,60647,2/13/2020,License,Fail,5. PROCEDURES FOR RESPONDING TO VOMITING AND D...,41.931042,-87.710406,"(41.93104159573519, -87.71040591126865)"
2,2362343,EL MUCHACHO ALEGRE,EL MUCHACHO ALEGRE,2163723,Restaurant,Risk 1 (High),3036 N PULASKI RD,CHICAGO,IL,60641,2/13/2020,Canvass Re-Inspection,Pass,,41.936521,-87.727259,"(41.93652108441832, -87.72725885229374)"
3,2362312,7-ELEVEN,7-ELEVEN,2704166,Restaurant,Risk 2 (Medium),177 N WELLS ST,CHICAGO,IL,60606,2/13/2020,License,Pass w/ Conditions,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.885257,-87.633770,"(41.88525719405359, -87.63376979601067)"
4,2362297,PORK CHOP,PORK CHOP,2699239,Restaurant,Risk 1 (High),1625 N HALSTED ST,CHICAGO,IL,60614,2/13/2020,License Re-Inspection,Pass w/ Conditions,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.911704,-87.648147,"(41.9117043820104, -87.64814711684095)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2360016,SARPINO'S PIZZERIA,SARPINO'S PIZZERIA,2098744,Restaurant,Risk 1 (High),1852 N DAMEN AVE,CHICAGO,IL,60647,2/6/2020,Canvass Re-Inspection,Pass,,41.915372,-87.677715,"(41.91537203392244, -87.6777147461087)"
96,2359995,BRAZILIAN BOWL FRESH GRILL,BRAZILIAN BOWL FRESH GRILL,2437092,Restaurant,Risk 1 (High),3200 W LAWRENCE AVE,CHICAGO,IL,60625,2/6/2020,Short Form Complaint,Pass,"55. PHYSICAL FACILITIES INSTALLED, MAINTAINED ...",41.968561,-87.708536,"(41.9685605606405, -87.70853637445603)"
97,2360053,HOST INTERNATIONAL INC,STARBUCKS (T3 H2),34150,Restaurant,Risk 2 (Medium),11601 W TOUHY AVE,CHICAGO,IL,60666,2/6/2020,Canvass,Pass,36. THERMOMETERS PROVIDED & ACCURATE - Comment...,42.008536,-87.914428,"(42.008536400868735, -87.91442843927047)"
98,2359975,EL FOGON DE ELENA RESTAURANT,EL FOGON DE ELENA RESTAURANT,2712646,Restaurant,Risk 1 (High),3149 W LAWRENCE AVE,CHICAGO,IL,60625,2/6/2020,License,Pass w/ Conditions,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.968351,-87.708017,"(41.968350838801385, -87.70801666203074)"


### Query for Each Record
* get star and price ratings

In [4]:
for index, item in df.iterrows():
    # get Yelp business ID with business match query
    response = yelp_api.business_match_query(
        name=item['DBA Name'].title(),
        address1=item['Address'].title(),
        city=item['City'].title(),
        state=item['State'],
        country='US'
    )
    
    try:
        bus_id = response['businesses'][0]['id']
        df.loc[index,'bus_id'] = bus_id
    except IndexError:
        print(f"{item['DBA Name'].title()} at {item['Address'].title()} is not found in Yelp")
        # move on to next business
        continue
        
    # Use business id to get price and star ratings with business detail query
    response = yelp_api.business_query(id=bus_id)
    try:
        df.loc[index,'cust_rating'] = response['rating']
    except (IndexError, KeyError):
        print(f"{item['DBA Name'].title()} at {item['Address'].title()} has no customer rating")
        
    try:
        df.loc[index,'price_rating'] = response['price'].count('$') # transform from $$ to count
    except (IndexError, KeyError):
        print(f"{item['DBA Name'].title()} at {item['Address'].title()} has no price rating data")
        
    print(f"{item['DBA Name'].title()} data are stored")

7-Eleven data are stored
Midlane Esports at 2741 N Milwaukee Ave is not found in Yelp
El Muchacho Alegre data are stored
7-Eleven data are stored
Pork Chop at 1625 N Halsted St is not found in Yelp
Midlane Esports at 2741 N Milwaukee Ave is not found in Yelp
Pho Number 1- Vietnamese Cuisine Llc data are stored
Royal Nutriton at 2624 W 59Th St is not found in Yelp
Viva Nutrition Club at 2624 W 59Th St is not found in Yelp
Bartoli'S Pizzeria at 658 N Ashland Ave is not found in Yelp
Ajawaah Sweets data are stored
Stan'S Corner Inc data are stored
Beefy'S data are stored
El Rey De La Costa at 3548 W Lawrence Ave has no price rating data
El Rey De La Costa data are stored
Captain Hooks data are stored
Jean'S Cafe & Restaurant data are stored
Sammy'S Breakfast-Lunch-Dinner at 250 E 103Rd St is not found in Yelp
El Famous Burrito - Rogers Park data are stored
Ziggy'S Side Door Pub & Deli at 6158 W 63Rd St has no price rating data
Ziggy'S Side Door Pub & Deli data are stored
Dunkin Donuts/Bas

In [6]:
df = df[['DBA Name', 'License #', 'bus_id', 'cust_rating', 'price_rating']]
df

Unnamed: 0,DBA Name,License #,bus_id,cust_rating,price_rating
0,7-ELEVEN,2704169,eityapX-OgxF6rjjJ0XmKQ,3.0,1.0
1,MIDLANE ESPORTS,2684099,,,
2,EL MUCHACHO ALEGRE,2163723,nw3gqyr5xtZfUDwlnv0vtg,4.0,1.0
3,7-ELEVEN,2704166,eityapX-OgxF6rjjJ0XmKQ,3.0,1.0
4,PORK CHOP,2699239,,,
...,...,...,...,...,...
95,SARPINO'S PIZZERIA,2098744,Xe8rZcPu0YpV2aEF4YsOhQ,2.0,2.0
96,BRAZILIAN BOWL FRESH GRILL,2437092,_-toZK3WpdvKpXYjT8wTCg,4.0,2.0
97,HOST INTERNATIONAL INC,34150,,,
98,EL FOGON DE ELENA RESTAURANT,2712646,,,


In [7]:
df.to_csv('../Resources/TestData_Yelp_queried.csv', index=False)