In [5]:
from datetime import datetime
import sys
import time
from SPARQLWrapper import SPARQLWrapper, JSON
import requests
import html


api_url = "https://climatekg.semanticclimate.net/api.php"

def fetch_mediawiki_data(api_url, params):
    try:
        response = requests.get(api_url, params=params)
        response.raise_for_status()  # Raise an exception for HTTP errors
        data = response.json()
        return data
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from MediaWiki API: {e}")
        return None

def extract_items(data):
    items = []
    results = data.get("query", {}).get("results", {})
    
    for item_id, item_data in results.items():
        print(item_data)
        subcats = item_data["printouts"].get("subcategoryOf", [None])
        subcat = ""
        if len(subcats) > 0: 
            subcat = subcats[0] 
        definition = ""
        if item_data["printouts"].get("definition", [{}]):
            definition = item_data["printouts"].get("definition", [{}])[0].get("Text", {}).get("item", [None])[0]
        item = {
            "id": item_id,
            "term": item_data["printouts"].get("term", [None])[0],
            "definition": definition,
            "subCategoryOf": subcat
        }
        items.append(item)
    
    return items


def generate_markdown_table(items):
    markdown = '\n| ID | Begriff | Definition | Unterkategorie von |\n'
    markdown += '|---|---|---|---|\n'

    for item in items: 
        markdown += f'|{item["id"]}'
        markdown += f'|{item["term"]}'
        markdown += f'|{item["definition"]}'
        markdown += f'|{item["subCategoryOf"]}'
        markdown += '|\n'
    
    return markdown


def get_glossary():
    params = {
        'action': 'ask',
        'query': '[[Item:+]] [[has subobject::+]] [[P1::Term]] |?Wikibase description=definition |?P3=term |?P10.P3=subcategoryOf',
        'format': 'json'
    }

    response = fetch_mediawiki_data(api_url, params)
    items = extract_items(response)
    if items:
        # Generate the markdown for the table
        markdown_table = generate_markdown_table(items)
        print(markdown_table)





In [6]:
get_glossary()

{'printouts': {'definition': [{'Text': {'label': 'Text', 'key': '_TEXT', 'typeid': '_txt', 'item': ['Each finding is grounded in an evaluation of underlying evidence and agreement. The IPCC calibrated language uses five qualifiers to express a level of confidence (very low, low, medium, high and very high )']}, 'Sprachcode': {'label': 'Sprachcode', 'key': '_LCODE', 'typeid': '__lcode', 'item': ['de']}}, {'Text': {'label': 'Text', 'key': '_TEXT', 'typeid': '_txt', 'item': ['2Each finding is grounded in an evaluation of underlying evidence and agreement. The IPCC calibrated language uses five qualifiers to express a level of confidence (very low, low, medium, high and very high )']}, 'Sprachcode': {'label': 'Sprachcode', 'key': '_LCODE', 'typeid': '__lcode', 'item': ['en']}}], 'term': ['low confidence', 'term_low confidence', 'term_low confidenceee', 'low confidence2'], 'subcategoryOf': ['qualifiers']}, 'fulltext': 'Item:Q10', 'fullurl': 'https://climatekg.semanticclimate.net/index.php?t