# Utiliser l'API Yelp pour obtenir des données

## Librairies

In [109]:
import argparse
import json
import pprint
import requests
import sys
import urllib
import pandas as pd
from urllib.error import HTTPError
from urllib.parse import quote
from urllib.parse import urlencode
from config import API_KEY, client_id # Nos identifiants pour l'API Yelp Fusion
from IPython.display import Image
from IPython.core.display import HTML
import os
from os.path import exists

## Constantes

In [3]:
API_HOST = 'https://api.yelp.com'
SEARCH_PATH = '/v3/businesses/search'
BUSINESS_PATH = '/v3/businesses/'

DEFAULT_TERM = 'bars'
DEFAULT_LOCATION = 'Toulouse'
SEARCH_LIMIT = 50

FILENAME_PHOTOS = "yelp_photos.csv"
FILENAME_REVIEWS = "yelp_reviews.csv"

## Fonctions

Ces fonctions nous serviront à interroger l'api de Yelp, puis à enregistrer les reviews et les url des photos dans les fichiers ```FILENAME_PHOTOS``` et ```FILENAME_REVIEWS```

In [79]:
def request(host, path, api_key, url_params=None):
    """Étant donné votre API_KEY, envoyez une requête GET à l'API.

    Args:
        host (str): L'hôte de domaine de l'API.
        path (str): Le chemin de l'API après le domaine.
        API_KEY (str): Votre clé API.
        url_params (dict): Un ensemble facultatif de paramètres de requête.

    Returns:
        dict: La réponse JSON de la requête.

    Raises:
        HTTPError: Une erreur se produit à partir de la requête HTTP.
    """
    url_params = url_params or {}
    url = '{0}{1}'.format(host, quote(path.encode('utf8')))
    headers = {
        'Authorization': 'Bearer %s' % api_key,
    }

    print(u'Querying {0} ...'.format(url))

    response = requests.request('GET', url, headers=headers, params=url_params)
    
    return response.json()

def request_reviews(host, path, api_key, url_params=None):
    url_params = url_params or {}
    url = '{0}{1}'.format(host, quote(path.encode('utf8')))
    headers = {
        'Authorization': 'Bearer %s' % api_key,
    }

    response = requests.request('GET', url, headers=headers, params=url_params)
    
    return response.json()

def search(api_key, term, location, offset=0):
    """Interroger l'API de recherche par un terme de recherche et un emplacement.

    Args:
        term (str): Le terme de recherche transmis à l'API.
        location (str): L'emplacement de recherche transmis à l'API.

    Returns:
        dict: La réponse JSON de la requête.
    """

    url_params = {
        'term': term.replace(' ', '+'),
        'location': location.replace(' ', '+'),
        'limit': SEARCH_LIMIT,
        'offset': offset
    }
    
    return request(API_HOST, SEARCH_PATH, api_key, url_params=url_params)


def get_business(api_key, business_id):
    """Interroger l'API Business par un ID d'entreprise (business_id).

    Args:
        business_id (str): L'ID de l'entreprise à interroger.

    Returns:
        dict: La réponse JSON de la requête.
    """
    business_path = BUSINESS_PATH + business_id

    return request(API_HOST, business_path, api_key)


def get_reviews(api_key, business_id):
    """Interroger l'API Business par un ID d'entreprise pour obtenir les reviews.

    Args:
        business_id (str): L'ID de l'entreprise à interroger.

    Returns:
        dict: La réponse JSON de la requête.
    """
    business_path = BUSINESS_PATH + business_id+'/reviews'

    return request_reviews(API_HOST, business_path, api_key)

def query_api(term, location):
    """Interroge l'API par les valeurs d'entrée de l'utilisateur.

    Args:
        term (str): Les termes de recherche.
        location (str): L'emplacement de l'entreprise à interroger.
    """
    response = search(API_KEY, term, location)
    
    businesses = response.get('businesses')
    
    print ('Nombre de restaurants :', len(businesses))

    if not businesses:
        print(u'Aucun restaurant trouvé pour {0} dans {1}.'.format(term, location))
        return
    
    ls_business_id = []
    ls_photos_business_id = []
    ls_rating = []
    ls_rev_business_id = []
    ls_photos = [] 
    ls_reviews = []
    for i in range(len(businesses)) :
        business_id = businesses[i]['id']
        response = get_business(API_KEY, business_id)
        ls_business_id.append(business_id)
        
        # récupération des photos
        if "photos" in response:
            photos=response['photos']
            for l in range(len(photos)) :
                ls_photos.append(photos[l])
                ls_photos_business_id.append(business_id)
        else:
            print("Aucune photo pour {}".format(business_id))
        
        # récupération des reviews
        M=get_reviews(API_KEY, business_id)
        if "reviews" in M:
            for j in range (len(M['reviews'])) :
                reviews=M['reviews'][j]['text']
                rating=M['reviews'][j]['rating']
                ls_reviews.append(reviews)
                ls_rating.append(rating)
                ls_rev_business_id.append(business_id)
        else:
            print("Pas de reviews pour {}".format(business_id))

    df_reviews=pd.DataFrame(
        {
            'business_id': ls_rev_business_id, 
            'reviews':ls_reviews, 
            'stars': ls_rating
        }
    )
    df_photos=pd.DataFrame(
        {
            'business_id': ls_photos_business_id, 
            'photos':ls_photos
        }
    )

    df_reviews.to_csv(FILENAME_REVIEWS,index=False)
    df_photos.to_csv(FILENAME_PHOTOS,index=False)
    
    def collect_reviews(
        API_KEY, 
        search_term, 
        location, 
        nb_businesses=200, 
        filename=FILENAME_REVIEWS):
    """ Collecte les reviews avec les critères search_term et location, d'un nombre d'entreprises
    défini par nb_businesses, les sauvegarde dans un fichier csv et retourne un dataframe
    
    Args:
        API_KEY (str): votre clé API
        search_term (str): vos termes de recherche
        location (str): le lieu de la recherche
        nb_businesses (int): nombre d'entreprises dont il faut collecter les reviews
        filename (str): nom du fichier où seront enregistrées les reviews
        
    Returns:
        pandas.DataFrame: les reviews collectées
    """
    offset = 0
    reviews = []
    while offset < nb_businesses:
        yelp_res = search(API_KEY, search_term, location, offset)
        print("Collecte des reviews de {} entreprises...".format(len(yelp_res["businesses"])))
        for business in yelp_res["businesses"]:
            revs = get_reviews(API_KEY, business["id"])
            if "reviews" in [*revs]:
                for rev in revs["reviews"]:
                    review = {
                        "review_id": rev["id"],
                        "user_id": rev["user"]["id"],
                        "business_id": business["id"],
                        "stars": rev["rating"],
                        "text": rev["text"]
                    }
                    reviews.append(review)
        print("{} reviews collectées".format(len(reviews)))        
        offset += SEARCH_LIMIT
    print("Enregistrement de {} reviews dans {}...".format(len(reviews), filename))
    df_reviews = pd.DataFrame(reviews)
    df_reviews.to_csv(filename, index=False)
    if exists("./"+filename):
        print("enregistré avec succès !")
    return df_reviews

## Interroger l'API

### Collecter les reviews

In [112]:
def collect_reviews(API_KEY, search_term, location, nb_businesses=200, filename=FILENAME_REVIEWS, filetype="dataframe"):
    offset = 0
    reviews = []
    while offset < nb_businesses:
        yelp_res = search(API_KEY, search_term, location, offset)
        print("Collecte des reviews de {} entreprises...".format(len(yelp_res["businesses"])))
        for business in yelp_res["businesses"]:
            revs = get_reviews(API_KEY, business["id"])
            if "reviews" in [*revs]:
                for rev in revs["reviews"]:
                    review = {
                        "review_id": rev["id"],
                        "user_id": rev["user"]["id"],
                        "business_id": business["id"],
                        "stars": rev["rating"],
                        "text": rev["text"]
                    }
                    reviews.append(review)
        print("{} reviews collectées".format(len(reviews)))        
        offset += SEARCH_LIMIT
    print("Enregistrement de {} reviews dans {}...".format(len(reviews), filename))
    df_reviews = pd.DataFrame(reviews)
    df_reviews.to_csv(filename, index=False)
    if exists("./"+filename):
        print("enregistré avec succès !")
    return df_reviews

In [113]:
tst_reviews = collect_reviews(API_KEY, "restaurant", "paris")
tst_reviews

Querying https://api.yelp.com/v3/businesses/search ...
Collecte des reviews de 50 entreprises...
150 reviews collectées
Querying https://api.yelp.com/v3/businesses/search ...
Collecte des reviews de 50 entreprises...
297 reviews collectées
Querying https://api.yelp.com/v3/businesses/search ...
Collecte des reviews de 50 entreprises...
445 reviews collectées
Querying https://api.yelp.com/v3/businesses/search ...
Collecte des reviews de 50 entreprises...
588 reviews collectées
Enregistrement de 588 reviews dans yelp_reviews.csv...
enregistré avec succès !


Unnamed: 0,review_id,user_id,business_id,stars,text
0,r3bxIJ2ekrp8UPseAj2wjQ,rZ8jfpusbUOn79k8-aM00Q,-0iLH7iQNYtoURciDpJf6w,5,"What an amazing Bistro!! First off, even if yo..."
1,SZtFX-6RF0fLrJjFEyM2XQ,auvnv87BU1RnbIvZ2ag8qg,-0iLH7iQNYtoURciDpJf6w,1,Warning! Tourist trap restaurant. This restau...
2,aGP7nLwEQOUg64jKP8ebsw,1al80cHoRjxr6y2XdH7yyA,-0iLH7iQNYtoURciDpJf6w,5,My favorite meal in Paris! \n\nWe started off ...
3,-to6adg_kucf1SgQckgJXw,SRGQPSaj5qy07YcJNbAQfg,IU9_wVOGBKjfqTTpAXpKcQ,5,This is a small bar/restaurant closed to Cathé...
4,G-YvPO-wUd-NIJDrahcYUg,IZpWaceT5jGXz7DuePadYw,IU9_wVOGBKjfqTTpAXpKcQ,4,what a wonderful lunch! \nthe decor is so fren...
...,...,...,...,...,...
583,mCdNSw81nKEYMVYF4833wA,9YNdR5vC4SFMlzNj7lyMpQ,luWgxwdQDUpURV21UlWslQ,5,Our family spent seven days eating our way thr...
584,TB5J08E6r-IkBt5-Ao1nqA,XJDLaoN1PerKw2woiKeepA,luWgxwdQDUpURV21UlWslQ,4,A must place to go in Paris. We were excited t...
585,YSuuZfpoz6VZi1wPiCWX3A,WsnxtawsIJbNL4wD1oWiAA,luWgxwdQDUpURV21UlWslQ,3,I've read about their segregation practices an...
586,7I50BrBghoG_I4SXnlaO7w,34zBgc8h0YybocASWvRXHQ,fJAK_McN2JC_y1uTsYte0w,5,Walking into this locale and one is inatantly ...


Importons les reviews sauvegardées pour vérifier que tout s'est bien passé

In [126]:
imported_reviews = pd.read_csv("./"+FILENAME_REVIEWS)
imported_reviews.head()

Unnamed: 0,review_id,user_id,business_id,stars,text
0,r3bxIJ2ekrp8UPseAj2wjQ,rZ8jfpusbUOn79k8-aM00Q,-0iLH7iQNYtoURciDpJf6w,5,"What an amazing Bistro!! First off, even if yo..."
1,SZtFX-6RF0fLrJjFEyM2XQ,auvnv87BU1RnbIvZ2ag8qg,-0iLH7iQNYtoURciDpJf6w,1,Warning! Tourist trap restaurant. This restau...
2,aGP7nLwEQOUg64jKP8ebsw,1al80cHoRjxr6y2XdH7yyA,-0iLH7iQNYtoURciDpJf6w,5,My favorite meal in Paris! \n\nWe started off ...
3,-to6adg_kucf1SgQckgJXw,SRGQPSaj5qy07YcJNbAQfg,IU9_wVOGBKjfqTTpAXpKcQ,5,This is a small bar/restaurant closed to Cathé...
4,G-YvPO-wUd-NIJDrahcYUg,IZpWaceT5jGXz7DuePadYw,IU9_wVOGBKjfqTTpAXpKcQ,4,what a wonderful lunch! \nthe decor is so fren...


### Collecter les photos

In [123]:
def collect_photos(
        API_KEY, 
        search_term, 
        location, 
        nb_businesses=200, 
        filename=FILENAME_PHOTOS):
    """ Collecte les photos avec les critères search_term et location, d'un nombre d'entreprises
    défini par nb_businesses, les sauvegarde dans un fichier csv et retourne un dataframe
    
    Args:
        API_KEY (str): votre clé API
        search_term (str): vos termes de recherche
        location (str): le lieu de la recherche
        nb_businesses (int): nombre d'entreprises dont il faut collecter les photos
        filename (str): nom du fichier où seront enregistrées les photos
        
    Returns:
        pandas.DataFrame: les reviews collectées
    """
    offset = 0
    photos = []
    while offset < nb_businesses:
        yelp_res = search(API_KEY, search_term, location, offset)
        print("Collecte des photos de {} entreprises...".format(len(yelp_res["businesses"])))
        for business in yelp_res["businesses"]:
            business = get_business(API_KEY, business["id"])
            if "photos" in [*business]:
                for photo in business["photos"]:
                    photo_entry = {
                        "business_id": business["id"],
                        "photo_url": photo
                    }
                    photos.append(photo_entry)
        print("{} photos collectées".format(len(photos)))        
        offset += SEARCH_LIMIT
    print("Enregistrement de {} photos dans {}...".format(len(photos), filename))
    df_photos = pd.DataFrame(photos)
    df_photos.to_csv(filename, index=False)
    if exists("./"+filename):
        print("enregistré avec succès !")
    return df_photos

In [125]:
collect_photos(API_KEY, "restaurant", "paris", 200)

Querying https://api.yelp.com/v3/businesses/search ...
Collecte des photos de 50 entreprises...
Querying https://api.yelp.com/v3/businesses/-0iLH7iQNYtoURciDpJf6w ...
Querying https://api.yelp.com/v3/businesses/IU9_wVOGBKjfqTTpAXpKcQ ...
Querying https://api.yelp.com/v3/businesses/cEjF41ZQB8-SST8cd3EsEw ...
Querying https://api.yelp.com/v3/businesses/pztzge22A_c_BfzLHCmaMw ...
Querying https://api.yelp.com/v3/businesses/ctP4c3mwVO5oOzLI48LtuQ ...
Querying https://api.yelp.com/v3/businesses/70xArgLO0k56YAytyBIHyg ...
Querying https://api.yelp.com/v3/businesses/WHHt_Jb8Tgidn9mW7oDnIg ...
Querying https://api.yelp.com/v3/businesses/SkZWjUwmNgcCJJQvgJT1DQ ...
Querying https://api.yelp.com/v3/businesses/FFz-WusZrBYZexKqhqzCkg ...
Querying https://api.yelp.com/v3/businesses/_qACPRqSQ6Nvd20H8EY3uw ...
Querying https://api.yelp.com/v3/businesses/KggnM_Z4wOa_JExunaaWHg ...
Querying https://api.yelp.com/v3/businesses/DwZsEW-rBrBBLHioGdKoAg ...
Querying https://api.yelp.com/v3/businesses/ZxtU74SJ

Querying https://api.yelp.com/v3/businesses/XqOlA1nGHcwiMmU2QDtdBQ ...
Querying https://api.yelp.com/v3/businesses/26YjCdVvyn5jv4sZYd1Lvw ...
Querying https://api.yelp.com/v3/businesses/prSiklkzkMYOPs7IoONtjA ...
Querying https://api.yelp.com/v3/businesses/3drmuEM-hbUqOXVmZRtF4g ...
Querying https://api.yelp.com/v3/businesses/-sggfLHsfPbgXw5S6a7Jdg ...
Querying https://api.yelp.com/v3/businesses/5jviAXLXm5E78wlpE0pTxA ...
Querying https://api.yelp.com/v3/businesses/AKjVaoytS_XrHBPX7iJeBQ ...
Querying https://api.yelp.com/v3/businesses/Tp528C9RQ-8lpT66KtPJ0A ...
Querying https://api.yelp.com/v3/businesses/BvC_O7iWXp9vVTSBKxmMPA ...
Querying https://api.yelp.com/v3/businesses/eV3XeyDMWwT0E8o8nYodRQ ...
Querying https://api.yelp.com/v3/businesses/Df7eNm_K1CwYy4E18vRzFw ...
Querying https://api.yelp.com/v3/businesses/nB_9kzgII13vfX6LNAdi9g ...
Querying https://api.yelp.com/v3/businesses/TYDSdXOJU-07rZgRFdZYEA ...
Querying https://api.yelp.com/v3/businesses/u0lE065YXe5sKcDiAzJcHg ...
Queryi

Unnamed: 0,business_id,photo_url
0,-0iLH7iQNYtoURciDpJf6w,https://s3-media2.fl.yelpcdn.com/bphoto/czh2IE...
1,-0iLH7iQNYtoURciDpJf6w,https://s3-media2.fl.yelpcdn.com/bphoto/Y0D70M...
2,-0iLH7iQNYtoURciDpJf6w,https://s3-media2.fl.yelpcdn.com/bphoto/TtMQlD...
3,IU9_wVOGBKjfqTTpAXpKcQ,https://s3-media4.fl.yelpcdn.com/bphoto/J9RByC...
4,IU9_wVOGBKjfqTTpAXpKcQ,https://s3-media4.fl.yelpcdn.com/bphoto/Y5fZV7...
...,...,...
577,luWgxwdQDUpURV21UlWslQ,https://s3-media4.fl.yelpcdn.com/bphoto/G5h7Af...
578,luWgxwdQDUpURV21UlWslQ,https://s3-media1.fl.yelpcdn.com/bphoto/-821dW...
579,MqdwabesLORy1qtSiv2NKA,https://s3-media3.fl.yelpcdn.com/bphoto/vkgbLl...
580,MqdwabesLORy1qtSiv2NKA,https://s3-media3.fl.yelpcdn.com/bphoto/IDs9Qp...


Importons les photos sauvegardées:

In [127]:
imported_photos = pd.read_csv("./"+FILENAME_PHOTOS)
imported_photos.head()

Unnamed: 0,business_id,photo_url
0,-0iLH7iQNYtoURciDpJf6w,https://s3-media2.fl.yelpcdn.com/bphoto/czh2IE...
1,-0iLH7iQNYtoURciDpJf6w,https://s3-media2.fl.yelpcdn.com/bphoto/Y0D70M...
2,-0iLH7iQNYtoURciDpJf6w,https://s3-media2.fl.yelpcdn.com/bphoto/TtMQlD...
3,IU9_wVOGBKjfqTTpAXpKcQ,https://s3-media4.fl.yelpcdn.com/bphoto/J9RByC...
4,IU9_wVOGBKjfqTTpAXpKcQ,https://s3-media4.fl.yelpcdn.com/bphoto/Y5fZV7...


In [132]:
for i in range(10):
    print("business_id : {}".format(imported_photos.iloc[i]["business_id"]))
    display(Image(url=imported_photos.iloc[i]["photo_url"]))

business_id : -0iLH7iQNYtoURciDpJf6w


business_id : -0iLH7iQNYtoURciDpJf6w


business_id : -0iLH7iQNYtoURciDpJf6w


business_id : IU9_wVOGBKjfqTTpAXpKcQ


business_id : IU9_wVOGBKjfqTTpAXpKcQ


business_id : IU9_wVOGBKjfqTTpAXpKcQ


business_id : cEjF41ZQB8-SST8cd3EsEw


business_id : cEjF41ZQB8-SST8cd3EsEw


business_id : cEjF41ZQB8-SST8cd3EsEw


business_id : pztzge22A_c_BfzLHCmaMw
