---
title: "Co-Site Tags"
jupyter: python3
code-fold: true 
execute:
  echo: false
  output: asis 
---

In [1]:
from datetime import datetime
import sys
import time
import requests
import html
import locale

locale.setlocale(locale.LC_ALL,"") 
#locale "de_DE" needs to be set as default on whichever system you run this notebook on
#otherwise the umlaute sorting won't work


api_url = "https://climatekg.semanticclimate.net/api.php"

def fetch_mediawiki_data(api_url, params, session):
    try:
        response = session.get(api_url, params=params)
        response.raise_for_status()  # Raise an exception for HTTP errors
        data = response.json()
        return data
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from MediaWiki API: {e}")
        return None

def extract_items(data):
    items = []
    results = data.get("query", {}).get("results", {})
    
    for item_id, item_data in results.items():
    
        txt_fields = ["term","vocabulary","tags","status"]
        item = {
            "id": item_id
        }

        for fieldname in txt_fields:
            if item_data["printouts"].get(fieldname, [{}]):
                #print(fieldname)
                item[fieldname] = item_data["printouts"].get(fieldname, [None])[0]
            else:
                item[fieldname] = ""

        if item["status"]:
            translate = {
                "Draft":"Entwurf",
                "Pending Review":"Review ausstehend",
                "Published":"Veröffentlicht",
                "Unpublished":"Unveröffentlicht"
            }
            for t in translate:
                if item["status"] == t:
                    item["status"] = translate[t]
        items.append(item)
    
    items_inverted = flip_tags_dictionary(items)

    return items_inverted


#custom class for inverting keys + values of a dict
#automatically puts values with the same key into a list
class Dictlist(dict):
    def __setitem__(self, key, value):
        if key not in self:
            super(Dictlist, self).__setitem__(key, [])
        self[key].append(value)


#inverts the items dictionary
#from {term: tags} to {tag: terms}
def flip_tags_dictionary(items):

    items_tags = Dictlist()

    #print(items)
    for item in items:  
        #print(f'{item["tags"]}')

        if "," in item["tags"]:
            for tag in item["tags"].split(", "):
                items_tags[tag] = f'{item["term"]}'
        else:
            items_tags[f'{item["tags"]}'] = f'{item["term"]}'
        
    return items_tags

#creates link to glossary term
def term_to_link(term):
    link = f'<a href="/cosite001.html#{term.lower().replace(" ", "-").replace("/*", "").replace("*", "")}">{term.replace("/*in", ":in").replace("*in", ":in")}</a>'
    return link

def generate_html_span(items, vocabulary):
    # Sort the keys using locale.strxfrm
    sorted_dict = sorted(items.keys(), key=locale.strxfrm)
    # Return a list of tuples (key, values) in sorted order
    html = ''
    for key in sorted_dict:  
        html += f'''    

### <span class="gloss-term-all gloss-term">{key.replace('/*in', ':in').replace('*in', ':in')}</span> 

    '''
        terms = items[key]
        for term in terms:
            term = term_to_link(term)
            html += f'''
      
<span class="gloss-term-all gloss-tag">{term}</span><br>
           
            '''
        
    return html


def login(api_url, username, password):
    session = requests.Session()
    
    # Step 1: GET request to fetch login token
    login_token_params = {
        'action': 'query',
        'meta': 'tokens',
        'type': 'login',
        'format': 'json'
    }
    response = session.get(api_url, params=login_token_params)
    response.raise_for_status()
    login_token = response.json()['query']['tokens']['logintoken']
    
    # Step 2: POST request to log in
    login_params = {
        'action': 'login',
        'lgname': username,
        'lgpassword': password,
        'lgtoken': login_token,
        'format': 'json'
    }
    response = session.post(api_url, data=login_params)
    response.raise_for_status()
    login_result = response.json()
    
    if login_result['login']['result'] == 'Success':
        #print('Logged in successfully')
        return session
    else:
        print('Failed to log in')
        sys.exit(1)

def get_glossary(vocabulary, session):

    params = {
        'action': 'ask',
        'query': f'[[Item:+]] [[has subobject::+]] [[P1::Term]] [[P18::{vocabulary}]] [[P20::!Unpublished]] |?Wikibase |?P3=term |?P20=status |?P22=tags |?P18=vocabulary |sort=P3 |limit=1000',
        'format': 'json'
    }

    response = fetch_mediawiki_data(api_url, params, session)
    items = extract_items(response)
    
    if items:
        # Generate the HTML for the table
        html = '<html><head><link rel="stylesheet" href="glossary.css"></head><body>\n\n'
        html += generate_html_span(items,vocabulary)
        html += '\n</body></html>'
        print(html)

In [2]:
username = 'reader'  # Replace with your username
password = 'readonly'  # Replace with your password

session = login(api_url, username, password)
get_glossary("Co-Site", session)

<html><head><link rel="stylesheet" href="glossary.css"></head><body>

    

### <span class="gloss-term-all gloss-term">Daten</span> 

    
      
<span class="gloss-term-all gloss-tag"><a href="/cosite001.html#dateninteroperabilität">Dateninteroperabilität</a></span><br>
           
            
      
<span class="gloss-term-all gloss-tag"><a href="/cosite001.html#datenkatalog">Datenkatalog</a></span><br>
           
            
      
<span class="gloss-term-all gloss-tag"><a href="/cosite001.html#fernerkundung">Fernerkundung</a></span><br>
           
            
      
<span class="gloss-term-all gloss-tag"><a href="/cosite001.html#fühlbarer-wärmestrom">Fühlbarer Wärmestrom</a></span><br>
           
            
      
<span class="gloss-term-all gloss-tag"><a href="/cosite001.html#geodaten">Geodaten</a></span><br>
           
            
      
<span class="gloss-term-all gloss-tag"><a href="/cosite001.html#metadaten">Metadaten</a></span><br>
           
            
      
<