In [5]:
import pdb
import requests

## Helper Functions

In [79]:
def query_wiki_api(params):
    url="https://en.wikipedia.org/w/api.php"
    return requests.get(url, params).json()

def get_pages(resp):
    return list(resp['query']["pages"].keys())

def page_dicts(resp):
    pages = get_pages(resp)
    return [{"pageid": page} for page in pages]

def add_links(resp, pages):
    for article in pages:
        try:
            pageid = article['pageid']
            links = [link['title'] for link in resp['query']['pages'][pageid]['links']]
            if "links" in article:
                article['links'] += links
            else:
                article['links'] = links
        except:
            continue
            
    return pages

def add_linkshere(resp, pages):
    for article in pages:
        try:
            pageid = article['pageid']
            links = [link['title'] for link in resp['query']['pages'][pageid]['linkshere']]
            if "linkshere" in article:
                article['linkshere'] += links
            else:
                article['linkshere'] = links
        except:
            continue
            
    return pages

def add_categories(resp, pages):
    for article in pages:
        try:
            pageid = article['pageid']
            categories = [cat['title'] for cat in resp['query']['pages'][pageid]['categories']]
            if "categories" in article:
                article['categories'] += categories
            else:
                article['categories'] = categories
        except:
            continue
            
    return pages

def update_continue(resp, params):
    if resp.get("continue"):
        # remove any previous continue strings
        keys = list(params.keys())
        for key in keys:
            if "continue" in key:
                del params[key]
                
        # update with new params
        params.update(resp.get("continue"))        
        return params
    else:
        return False
    
        

In [80]:
def wiki_multi_query(self, articles, params=None, pages=None):

    if not params:
        params = {
            "action": "query",
            "format": "json",

            "prop": "redirects|links|linkshere|categories",

            # redirects
            "rdnamespace": 0,
            "rdlimit": "max",

            # links
            "pllimit": "max",
            "plnamespace": 0,

            # linkshere
            "lhlimit": "max",
            "lhnamespace": 0,
            "lhshow": "!redirect",

            # categories
            "cllimit": "max",

            # automatic redirect
            "redirects": 1
        }



    params['titles'] = "|".join(articles)

    resp = query_wiki_api(params)

    if not pages:
        pages = page_dicts(resp)

    pages = add_links(resp, pages)
    pages = add_linkshere(resp, pages)
    pages = add_categories(resp, pages)

    # will return an updated params with continue statements OR False
    params = update_continue(resp, params)

    # if params still is truthy, then it was updated with a continue
    # start the process again on the continued work
    if params:
        return wiki_multi_query(articles, params, pages)

    return pages

In [78]:
wiki_multi_query(["Random forest", "Decision tree", "Machine learning", "Sonata"])[3]['linkshere']

['Antonio Vivaldi',
 'Violin Sonata No. 5 (Beethoven)',
 'Classical period (music)',
 'Dmitri Shostakovich',
 'Frédéric Chopin',
 'Henry Purcell',
 'Italy',
 'Music',
 'Poland',
 'The Shockwave Rider',
 'Trilogy',
 'Violin',
 'Wolfgang Amadeus Mozart',
 'William Herschel',
 'Symphony',
 'Cantata',
 'Domenico Scarlatti',
 'Arcangelo Corelli',
 'Robert Schumann',
 'Wojciech Kilar',
 'Jeremy Bentham',
 'Aaron Copland',
 'Henry Mancini',
 'Music theory',
 'Carl Philipp Emanuel Bach',
 'Francis Edward Bache',
 'Artur Schnabel',
 'In Search of Lost Time',
 'Nikolai Rimsky-Korsakov',
 "Viola d'amore",
 'Piccolo',
 'Chamber music',
 'Sonata form',
 'History of music',
 'Cor anglais',
 'Sheet music',
 'Frederick the Great',
 'Scherzo',
 'Index of music articles',
 'Alberti bass',
 'Benedetto Marcello',
 'Binary form',
 'Opus number',
 'Piano Sonata No. 14 (Beethoven)',
 'Frederick Delius',
 'Diabelli Variations',
 'Concerto grosso',
 'Duet',
 'The Four Seasons (Vivaldi)',
 'Suite (music)',
 'Pi