In [1]:
from datetime import datetime
import sys
import time
import requests
import html
import locale

locale.setlocale(locale.LC_ALL,"") 
#locale "de_DE" needs to be set as default on whichever system you run this notebook on
#otherwise the umlaute sorting won't work


api_url = "https://climatekg.semanticclimate.net/api.php"

def fetch_mediawiki_data(api_url, params, session):
    try:
        response = session.get(api_url, params=params)
        response.raise_for_status()  # Raise an exception for HTTP errors
        data = response.json()
        return data
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from MediaWiki API: {e}")
        return None

def extract_items(data):
    items = []
    results = data.get("query", {}).get("results", {})
    
    for item_id, item_data in results.items():
        #print(item_data)
        #print()
        

        multilang_fields = ["description"]
        entity_fields = ["subCategoryOf", "synonyms","tags", "similarTo"]
        txt_fields = ["term","description_plain","vocabulary","acronym","status"]
        item = {
            "id": item_id
        }

        for fieldname in txt_fields:
            if item_data["printouts"].get(fieldname, [{}]):
                item[fieldname] = item_data["printouts"].get(fieldname, [None])[0]
            else:
                item[fieldname] = ""

        for fieldname in entity_fields:
            if item_data["printouts"].get(fieldname, [{}]):
                entities = item_data["printouts"].get(fieldname, [])
                # Check if entity is a dictionary and has the 'fulltext' key
                item[fieldname] = ", ".join(
                    [entity.get('fulltext', '') if isinstance(entity, dict) else str(entity) for entity in entities]
                )
    
                # Create a list of individual links
                links = [
                    f"[{entity.get('fulltext', '').replace('/*in', ':in').replace('*in', ':in')}](#{entity.get('fulltext', '').lower().replace(' ', '-').replace('/*', '').replace('*', '')})"
                #https://tibhannover.github.io/co-site-glossar/cosite001.html
                    if isinstance(entity, dict)
                    else f"[{str(entity).replace('/*in', ':in').replace('*in', ':in')}](#{str(entity).lower().replace(' ', '-').replace('/*', ':').replace('*', '')})"
                    for entity in entities
                ]
                                
                # Join the links with commas
                item[f"{fieldname}_links"] = ", ".join(links)
            else:
                item[fieldname] = ""
                item[f"{fieldname}_links"] = ""


        for fieldname in multilang_fields:
            if item_data["printouts"].get(fieldname, [{}]):
                #print(fieldname)
                item[fieldname] = item_data["printouts"].get(fieldname, [{}])[0].get("Text", {}).get("item", [None])[0]
            else:
                item[fieldname] = ""
        
        if item["status"]:
            translate = {
                "Draft":"Entwurf",
                "Pending Review":"Review ausstehend",
                "Published":"Veröffentlicht",
                "Unpublished":"Unveröffentlicht"
            }
            for t in translate:
                if item["status"] == t:
                    item["status"] = translate[t]
        items.append(item)
    
    return items

#creates link to tag overview
def tag_to_link(tag):
    link = ""
    if ", " in tag:
        for i, tag in enumerate(tag.split(", ")):
            link += f'<a href="/cosite002.html#{tag.lower().replace(" ", "-").replace("/*", "").replace("*", "")}">{tag.replace("/*in", ":in").replace("*in", ":in")}</a>, '
    else:
        link += f'<a href="/cosite002.html#{tag.lower().replace(" ", "-").replace("/*", "").replace("*", "")}">{tag.replace("/*in", ":in").replace("*in", ":in")}</a>'
    return link.removesuffix(', ')


def generate_html_span(items, vocabulary):
    items = sorted(items, key=lambda t: locale.strxfrm(t['term']))
    html = '''

---

    '''      
    for item in items:  

        html +=f'''

### <span style="display: inline-flex" class="gloss-term-all gloss-term">{item["term"].replace('/*in', ':in').replace('*in', ':in')}</span>

```{{=latex}}
\\nopagebreak
\penalty 100000  
```
        '''

        if item.get("acronym"):
            html += f'''
**<span style="display: inline-flex" class="gloss-term-all gloss-acronym">({item["acronym"]})</span>**

```{{=latex}}
\\nopagebreak
\penalty 100000 
```
            '''

        html+=f'''

<span class="gloss-term-all gloss-definition">{item["description"].replace('/*in', ':in').replace('*in', ':in')}</span>
        
```{{=latex}}
\\nopagebreak
\penalty 100000
```
        '''

        if item.get("description_plain"):
            html += f'''
**<span class="gloss-term-all gloss-cl-definition-label">Einfache Beschreibung: </span> **
<span class="gloss-term-all gloss-cl-definition">{item["description_plain"].replace('/*in', ':in').replace('*in', ':in')}</span>
<br>

```{{=latex}}
\\nopagebreak
\penalty 100000  
```
            ''' 

        if item.get("status"):
            html += f'''
::: {{.content-visible when-format="html"}}
**<span class="gloss-term-all gloss-status-label">Status: </span> **
<span class="gloss-term-all gloss-status">{item["status"]}</span>
<br>

:::
            '''

        if item.get("similarTo"):
            html += f'''
**<span class="gloss-term-all gloss-similar-tag gloss-similar-label">Verwandt: </span> **
<span class="gloss-term-all gloss-similarto">{item["similarTo_links"]}</span>
<br>

```{{=latex}}
\\nopagebreak
\penalty 100000 
```
            '''

        if item.get("subCategoryOf"):
            html += f'''
**<span class="gloss-term-all gloss-subclass-tag gloss-subclass-label">Unterbegriff von: </span> **
<span class="gloss-term-all gloss-subCategoryOf">{item["subCategoryOf_links"]}</span>
<br>

```{{=latex}}
\\nopagebreak
\penalty 100000  
```
            '''

        if item.get("synonyms"):
            html += f'''
**<span class="gloss-term-all gloss-synonyme-tag gloss-synonyme-label">Synonyme: </span> **
<span class="gloss-term-all gloss-synonym">{item['synonyms_links']}</span>
<br>

```{{=latex}}
\\nopagebreak
\penalty 100000  
```
            '''

        if item.get("tags"):
            tag = tag_to_link(item.get("tags"))
            html += f'''

::: {{.content-visible when-format="html"}}
<span class="tag-label">Tags:</span>
<span class="gloss-term-all gloss-tag">{tag}</span>
:::

::: {{.content-visible when-format="pdf"}}
*<span class="gloss-term-all gloss-tag">{tag}</span>*
:::

            '''
        html+='''
---

```{=latex}
\penalty -100  % Allow a break more easily here
```

        '''

    return html


def login(api_url, username, password):
    session = requests.Session()
    
    # Step 1: GET request to fetch login token
    login_token_params = {
        'action': 'query',
        'meta': 'tokens',
        'type': 'login',
        'format': 'json'
    }
    response = session.get(api_url, params=login_token_params)
    response.raise_for_status()
    login_token = response.json()['query']['tokens']['logintoken']
    
    # Step 2: POST request to log in
    login_params = {
        'action': 'login',
        'lgname': username,
        'lgpassword': password,
        'lgtoken': login_token,
        'format': 'json'
    }
    response = session.post(api_url, data=login_params)
    response.raise_for_status()
    login_result = response.json()
    
    if login_result['login']['result'] == 'Success':
        #print('Logged in successfully')
        return session
    else:
        print('Failed to log in')
        sys.exit(1)

def get_glossary(vocabulary, session):

    params = {
        'action': 'ask',
        'query': f'[[Item:+]] [[has subobject::+]] [[P1::Term]] [[P18::{vocabulary}]] [[P20::!Unpublished]] |?Wikibase description=description |?P26=description_plain |?P3=term |?P10.P3=subCategoryOf |?P20=status |?P21=acronym |?P22=tags |?P23.P3=similarTo |?P18=vocabulary |?P24.P3=synonyms |sort=P3 |limit=1000',
        'format': 'json'
    }

    response = fetch_mediawiki_data(api_url, params, session)
    items = extract_items(response)
    
    if items:
        # Generate the HTML for the table
        html = '<html><head><link rel="stylesheet" href="glossary.css"></head><body>\n\n'
        #html += generate_html_table(items)
        html += generate_html_span(items,vocabulary)
        html += '\n</body></html>'
        print(html)

In [2]:
username = 'reader'  # Replace with your username
password = 'readonly'  # Replace with your password

session = login(api_url, username, password)
get_glossary("Co-Site", session)

<html><head><link rel="stylesheet" href="glossary.css"></head><body>



---

    

### <span style="display: inline-flex" class="gloss-term-all gloss-term">360-Grad-Video</span>

```{=latex}
\nopagebreak
\penalty 100000  
```
        

<span class="gloss-term-all gloss-definition">Video, das in alle Richtungen gleichzeitig aufgenommen wird, sodass sich die Zuschauer:innen in jede Richtung umsehen können. Diese Videos bieten ein immersives Erlebnis, bei dem Betrachter:innen das Gefühl haben, mitten im Geschehen zu sein, wenn sie das Video auf einem Bildschirm oder mit einer VR-Brillen betrachten.</span>
        
```{=latex}
\nopagebreak
\penalty 100000
```
        
::: {.content-visible when-format="html"}
**<span class="gloss-term-all gloss-status-label">Status: </span> **
<span class="gloss-term-all gloss-status">Entwurf</span>
<br>

:::
            
**<span class="gloss-term-all gloss-similar-tag gloss-similar-label">Verwandt: </span> **
<span class="gloss-term-all gloss-similarto">[