# API Documentation: https://www.mediawiki.org/wiki/API:Action_API

In [99]:
import requests
import pandas as pd

In [100]:
url = "https://en.wikipedia.org/w/api.php" # This is the only endpoint you will need!

In [101]:
# choose the parameters that you need for your question
params = {
    "action": "query",
    "list": "categorymembers",
    "cmtitle": "Category:Good articles",
    "cmlimit": 500,
    "format": "json"
}

# create a header that describes this is for educational purposes
headers = {
    'User-Agent': 'StatisticsEducationBot/1.0 (Statistics Class Project; educational use)'
}

In [102]:
# Fetch data
session = requests.Session()
response = session.get(url=url, params=params, headers=headers)

print("Status code:", response.status_code)
data = response.json()
data

Status code: 200


{'batchcomplete': '',
 'continue': {'cmcontinue': 'page|0f4c94042e3244504c2a4004482a4c3e044e46525038011801c4dcbfdcc2dc08|44504196',
  'continue': '-||'},
 'query': {'categorymembers': [{'pageid': 30559581,
    'ns': 0,
    'title': '? Nycticebus linglom'},
   {'pageid': 27146502, 'ns': 0, 'title': '?Oryzomys pliocaenicus'},
   {'pageid': 240931, 'ns': 0, 'title': '.hack (video game series)'},
   {'pageid': 2392479, 'ns': 0, 'title': '.hack//G.U.'},
   {'pageid': 79595782, 'ns': 0, 'title': '*SCAPE building'},
   {'pageid': 30718808, 'ns': 0, 'title': '0-8-4'},
   {'pageid': 223102, 'ns': 0, 'title': 'I Corps (United States)'},
   {'pageid': 9622085, 'ns': 0, 'title': 'Tropical Depression One (1992)'},
   {'pageid': 26406374, 'ns': 0, 'title': 'Tropical Depression One (1993)'},
   {'pageid': 77591639, 'ns': 0, 'title': '1'},
   {'pageid': 43338008,
    'ns': 0,
    'title': '1st Armoured Division (United Kingdom)'},
   {'pageid': 70972352,
    'ns': 0,
    'title': 'List of orders of ba

In [103]:
# Here's how you can grab just the title
article_title = data['query']['categorymembers'][0]['title']
print(f"Article: {article_title}\n")

Article: ? Nycticebus linglom



In [104]:
# Here's how you grab the page's content. Notice that we are using different parameters! This means we need to make a new request.
content_params = {
    "action": "query",
    "titles": article_title,
    "prop": "extracts",  # This gets the text content
    "explaintext": True,  # Plain text instead of HTML
    "format": "json"
}

response = session.get(url=url, params=content_params, headers=headers)
data = response.json()

pages = data['query']['pages']
page_id = list(pages.keys())[0]
content = pages[page_id].get('extract', 'No content available')

print("Page content:")
print(content)

Page content:
? Nycticebus linglom is a fossil strepsirrhine primate from the Miocene of Thailand. Known only from a single tooth, an upper third molar, it is thought to be related to the living slow lorises (genus Nycticebus), but the material is not sufficient to assign the species to Nycticebus with certainty, and the species name therefore uses open nomenclature. With a width of 1.82 mm, this tooth is very small for a primate. It is triangular in shape, supported by a single root, and shows three main cusps, in addition to various crests. The absence of a fourth cusp, the hypocone, distinguishes it from various other prosimian primates.


== Taxonomy ==
? Nycticebus linglom was described in 1997 by French paleontologists Pierre Mein and Léonard Ginsburg in a report on the fossil mammals of Li Mae Long, a Miocene site in Thailand, about 17–18 million years old. The animal is known from a single tooth, and on the basis of comparisons with other prosimian primates Mein and Ginsburg co