# Exploring facets

First let's import a few things that we'll need. We're going to use [Plotly](https://plot.ly/python/) to create charts.

In [None]:
import requests
import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd

# Make sure data directory exists
os.makedirs('../../data/Trove', exist_ok=True)

py.init_notebook_mode()

Insert your API key between the quotes.

In [None]:
api_key = ''
print('Your API key is: {}'.format(api_key))

In [None]:
api_search_url = 'https://api.trove.nla.gov.au/result'

Set up our query parameters. We want everything, so we set the `q` parameter to be a single space.

In [None]:
params = {
    'q': ' ', # A space to search for everything
    'facet': 'format',
    'zone': 'book', 
    'key': api_key,
    'encoding': 'json',
    'n': 1
}

In [None]:
response = requests.get(api_search_url, params=params)
print(response.url) # This shows us the url that's sent to the API
data = response.json()
# print(data)

In [None]:
from operator import itemgetter

def facet_totals():
    '''
    Loop through facets saving terms and counts.
    Returns a list of dictionaries.
    '''
    facets = []
    # Sort alphabetically by facet name
    facet_list = sorted(data['response']['zone'][0]['facets']['facet']['term'], key=itemgetter('search'))
    for term in facet_list:
        term_count = int(term['count'])
        if 'term' in term:
            # There be sub-terms!
            for subterm in sorted(term['term'], key=itemgetter('search')):
                facets.append({'facet': subterm['search'], 'total': int(subterm['count'])})
                # Subtract the subterm count from the term count
                term_count = term_count - int(subterm['count'])
                # print('{:<50} {:,}'.format(subterm['search'], int(subterm['count'])))
        # print('{:<50} {:,}'.format(term['search'], term_count))
        facets.append({'facet': term['search'], 'total': term_count})
    return pd.DataFrame(facets)

facet_totals = facet_totals()
facet_totals

Now we can create a bar chart using Plotly. The `x` values will be the zone names, and the `y` values will be the totals.

In [None]:
# Comment out either or both of these lines if not necessary
# Sort by total (highest to lowest) and take the top twenty
top_facets = facet_totals.sort_values(by="total", ascending=False)[:20]

In [None]:
# Create a bar chart
plot_data = [go.Bar(
            x=top_facets['facet'],
            y=top_facets['total']
    )]
py.iplot(plot_data, filename='facets-bar')

Or if you'd prefer a pie chart...

In [None]:
# Create a pie chart
plot_data = [go.Pie(
            labels=top_facets['facet'],
            values=top_facets['total']
    )]
py.iplot(plot_data, filename='facets-pie')

In [None]:
facet_totals.to_csv('../../data/Trove/facet-{}.csv'.format(params['facet']), index=False)