In [2]:
import requests
import json
import datetime 
from datetime import date
import pandas

In [3]:
def buildURL(resource_path, host="httpbin.org", protocol="https", 
             extension=None, port=None):
    if resource_path[0] != '/':
        resource_path = '/' + resource_path
    
    if extension != None:
        resource_path += "." + extension
        
    if port != None:
        host = host + ":{}".format(port)
    
    url_template = "{}://{}{}"
    url = url_template.format(protocol, host, resource_path)
    return url

In [4]:
def get_json_object(asin):
    host="api.audible.com"
    main_path=f"1.0/catalog/products/{asin}"
    # Builf url for request
    url=buildURL(main_path,host,protocol="https")

    # Build query_params to get all variables in the main table
    query_params={"response_groups":"contributors,media,product_details,rights,product_attrs,product_extended_attrs,series,sample"}
    try:
        response=requests.get(url,params=query_params)
        assert response.status_code==200
    except:
        print(f"Failed: {main_path} with status code {response.status_code}")
    json_text=json.loads(response.text)["product"] 
    return json_text

In [5]:
def get_today_date():
    return date.today().strftime("%d/%m/%Y")

In [6]:
def input_manually(dictionary,key,value):
    dictionary[key]=value
    return dictionary

In [7]:
def get_pd_uri(json_object):
    try:
        asin=json_object["asin"]
        uri=requests.get(f"https://www.audible.com/pd/{asin}",allow_redirects=True).url
    except:
        uri=""
    return uri


In [8]:
def get_series_uri(json_object):
    try:
        series_endpoint=json_object["series"][0]["url"][3:]
        series_uri=requests.get(f"https://www.audible.com/series{series_endpoint}",allow_redirects=True).url
    except:
        series_uri=""
    return series_uri

In [9]:
def get_publisher_uri(json_object):
    try:
        publisher_name=json_object["publisher_name"]
        publisher_uri=requests.get(f"https://www.audible.com/search?searchProvider={publisher_name}",allow_redirects=True).url
    except:
        publisher_uri=""
    return publisher_uri

In [10]:
def get_author_uri(json_object):
    try:
        author_asin=json_object["author"][0]["asin"]
        author_uri=requests.get(f"https://www.audible.com/author/{author_asin}",allow_redirects=True).url
    except:
        author_uri=""
    return author_uri

In [13]:
def build_dict(my_dict,key,response,response_key):
    if len(response_key)==1:
        try:
            my_dict[key]=response[response_key[0]]
        except:
            my_dict[key]=""
    else:
        i=0
        try:
            while i<len(response_key):
                response=response[response_key[i]]
                i+=1
            my_dict[key]=response
        except:
            my_dict[key]=""
    return my_dict
## Functions to scrape data in the Main table
def main_table(asin):
    json_text=get_json_object(asin)
    L_all=[]
    D_main={}
    D_author={}
    D_narrator={}
    # List of variables that can be used with helper function try_except
    variables=["asin","publisher_name","sku","sku_lite","title",'subtitle',"language","content_delivery_type","content_type","format_type","has_children","is_adult_product",
    "is_listenable","is_purchasability_suppressed","is_vvab","merchandising_summary","release_date","runtime_length_min","copyright","extended_product_description","is_pdf_url_available","merchandising_description","platinum_keywords",
    "product_site_launch_date","read_along_support","is_world_rights","issue_date","publisher_summary"]
    ## For loop for all the variables
    for var in variables:
        build_dict(D_main,var,json_text,[var])
    D_main["uri"]=get_pd_uri(json_text)
    build_dict(D_main,"series_name",json_text,["series",0,"title"])
    D_main["series_uri"]=get_series_uri(json_text)
    D_main["publisher_uri"]=get_publisher_uri(json_text)
    ## author dictionary
    build_dict(D_author,"author",json_text,["authors",0,"name"])
    build_dict(D_author,"asin",json_text,["authors",0,"asin"])
    D_author["uri"]=get_author_uri(json_text)
    build_dict(D_narrator,"name",json_text,["narrator",0,"name"])
    L_all.append(D_main) 
    L_all.append(D_author)
    L_all.append(D_narrator)
    return L_all


----

# Authors & Narrators

In [20]:
def get_rating_object(asin):
    host="api.audible.com"
    main_path=f"1.0/catalog/products/{asin}"
    # Builf url for request
    url=buildURL(main_path,host,protocol="https")

    # Build query_params to get all variables in the main table
    query_params={"response_groups":"rating,price"}
    try:
        response=requests.get(url,params=query_params)
        assert response.status_code==200
    except:
        print(f"Failed: {main_path} with status code {response.status_code}")
    json_text=json.loads(response.text)["product"] 
    return json_text

In [23]:
def price_reviews(asin):
    Pr_Revs={}
    json_text=get_rating_object(asin)
    build_dict(Pr_Revs,"date_scraped_price",json_text,["price","list_price","base"])
    build_dict(Pr_Revs,"price_unit",json_text,["price","list_price","currency_code"])
    build_dict(Pr_Revs,"num_reviews",json_text,["rating","num_reviews"])
    vars=["average_rating","display_average_rating",'display_stars','num_five_star_ratings',
    'num_four_star_ratings','num_three_star_ratings','num_two_star_ratings','num_one_star_ratings','num_ratings']
    for var in vars:
        build_dict(Pr_Revs,'overall_'+var,json_text,["rating","overall_distribution",var])
        build_dict(Pr_Revs,'performance_'+var,json_text,["rating","performance_distribution",var])
        build_dict(Pr_Revs,'story_'+var,json_text,["rating","story_distribution",var])
    return Pr_Revs

In [25]:
## Count number of reviews
def review(asin,p):
    host="api.audible.com"
    main_path="1.0/catalog/products/{0}/reviews"
    # Author count, narrator count, author asin
    url=buildURL(main_path.format(asin),host,protocol="https")

    # Get author_count, narrator_count, publisher_name, sku, sku_lite
    query_params={"num_results":50,'page':p}
    rev=requests.get(url,params=query_params)
    return json.loads(rev.text)["customer_reviews"]

def count_review(asin):
    i=1
    while len(review(asin,i))==50:
        i+=1
    else:
        return i, len(review(asin,i))
    
def all_review(asin):
    all=[]
    num_page=count_review(asin)[0]
    last_page=count_review(asin)[1]
    for i in range(1,num_page+1):
        all.extend(review(asin,i))
    return all