In [2]:
import sys
from pathlib import Path

parent = str(Path().resolve().parent)
if parent not in sys.path:
    sys.path.insert(0, parent)

In [3]:
import requests
import pandas as pd
import re
import numpy as np
import time
import json

In [4]:
with open("../categories/categories_musicians.txt", "r") as file:
    categories_musicians = file.read().splitlines()

In [5]:


WIKI_API_URL = "https://da.wikipedia.org/w/api.php"

def get_category_members(category, limit=10000, cmcontinue=None):
    """Fetches articles from a given category."""
    params = {
        "action": "query",
        "list": "categorymembers",
        "cmtitle": category,
       "cmlimit": limit,
        "cmtype": "page",
        "format": "json"
    }
    
    if cmcontinue:
        params["cmcontinue"] = cmcontinue

    response = requests.get(WIKI_API_URL, params=params)
    return response.json()

def get_music_articles(categories):
    """Fetches Wikipedia articles related to musicians, bands, and music."""
    
    articles = []
    
    for category in categories:
        print(f"Fetching articles from: {category}")
        data = get_category_members(category)
        articles.extend(data.get("query", {}).get("categorymembers", []))
    
    return articles

# Fetch and display the articles
musicians = get_music_articles(categories_musicians)

print(len(musicians))

Fetching articles from: Kategori:Sangere fra Danmark
Fetching articles from: Kategori:Musikere fra Danmark
Fetching articles from: Kategori:Rappere fra Danmark
Fetching articles from: Kategori:Rockmusikere fra Danmark
Fetching articles from: Kategori:Bluesmusikere fra Danmark
Fetching articles from: Kategori:Folkemusikere fra Danmark
Fetching articles from: Kategori:Heavy metal-musikere fra Danmark
Fetching articles from: Kategori:Hiphoppere fra Danmark
Fetching articles from: Kategori:Jazzmusikere fra Danmark
Fetching articles from: Kategori:Jazzkomponister fra Danmark
Fetching articles from: Kategori:Jazzpianister fra Danmark
Fetching articles from: Kategori:Klassiske musikere fra Danmark
Fetching articles from: Kategori:Klassiske pianister fra Danmark
Fetching articles from: Kategori:Popmusikere fra Danmark
Fetching articles from: Kategori:Elektroniske musikere fra Danmark
Fetching articles from: Kategori:Countrymusikere fra Danmark
Fetching articles from: Kategori:Sangskrivere fra 

In [6]:
for musician in musicians:
    print(musician["title"])

Peter Abrahamsen
Troels Abrahamsen
Al Agami
Kuku Agami
Alma Agger
Jeanett Albeck
Patrick Alexander
Alexandra af Frederiksborg
Alouise
Carl Alstrup
Alex Ambrose
Ida Ambrose
Kamilia Amélie
Bo Andersen (sanger)
Dorthe Andersen
Jacob Andersen (sanger)
Frans Andersson
Bastian Lars Andreasen
Annapurna (sanger)
Anne Grete
Annelouise
Lotte Arnsbjerg
Edward Aroutounjan
Marie Askehave
Aura (sanger)
Babou
Ove Bager
Vivi Bak
Tanne Amanda Balcells
Marcelino Ballarin
Gertrude Barrison
Nanna Barslev
Thomas Barsøe
Basim
Ellen Beck
Michel Belli
Peter Belli
Elias Bender Rønnenfelt
Ulle Bjørn Bengtsson
Søren Bernbom
Maud Bertelsen
Ole Berthelsen (sanger)
Thomas Bickham
Viggo Bielefeldt
Pedro Biker
Katrine Bille
Mogens Binderup
Jesper Binzer
Jodle Birge
Michelle Birkballe
Lars Bisgaard
Jonas Bjerre (sanger)
Sys Bjerre
Katinka Bjerregaard
Anette Blegvad
Anders Blichfeldt
Helene Blum
Hanne Boel
Christina Boelskifte
Axel Boisen
Sofie Bonde
Stine Bramsen
Christopher Brandt
Kalle Brandt
Niels Brandt
Jacob Bred

In [7]:
WIKI_API_URL = "https://da.wikipedia.org/w/api.php"

def get_article_content(title):
    """Fetches the introduction text of a Wikipedia article."""
    params = {
        "action": "query",
        "prop": "extracts",
        "titles": title,
        "exintro": True,  # Only fetch the intro section
        "format": "json"
    }
    response = requests.get(WIKI_API_URL, params=params)
    data = response.json()
    
    pages = data.get("query", {}).get("pages", {})
    for page_id, page_info in pages.items():
        return page_info.get("extract", "No content available.")
    
def get_article_links(title):
    """Fetches all internal Wikipedia links from an article."""
    params = {
        "action": "query",
        "prop": "links",
        "titles": title,
        "pllimit": "max",  # Fetch maximum number of links
        "format": "json"
    }
    response = requests.get(WIKI_API_URL, params=params)
    data = response.json()
    
    pages = data.get("query", {}).get("pages", {})
    links = []
    for page_id, page_info in pages.items():
        for link in page_info.get("links", []):
            links.append(link["title"])
    
    return links

In [8]:
article_title = "Metallica"  # Replace with any band or artist
content = get_article_content(article_title)
links = get_article_links(article_title)

print(f"Article Content for {article_title}:\n", content)
print("\nLinks in the Article:")
for link in links:
    print("-", link)

Article Content for Metallica:
 <p><b>Metallica</b> er et amerikansk heavy metal-band, som blev dannet i 1981 under dyb inspiration af deres idoler Iron Maiden i Los Angeles i Californien. Metallica bestod oprindeligt af trommeslager og stifter af bandet, Lars Ulrich, rytmeguitarist og vokalist James Hetfield, lead guitarist Dave Mustaine og bassist Ron McGovney. De to sidste blev smidt ud af bandet til fordel for Kirk Hammett og Cliff Burton. Fyringen af Mustaine resulterede i en kamp mellem ham og Metallica. I september 1986 kom Metallicas turnébus ud af kontrol og væltede, så Burton blev dræbt under bussen. Jason Newsted afløste ham cirka to måneder senere. Newsted forlod bandet i 2001 og blev erstattet af Robert Trujillo i 2003.
</p><p>Metallicas tidlige udgivelser indeholdt tempofyldte, instrumentale og aggressive numre, der placerede dem som en af de fire store af thrash metal-genren sammen med Slayer, Megadeth (som blev oprettet af eks-Metallicamedlem Dave Mustaine i 1985) og An

In [9]:
import requests

WIKI_API_URL = "https://da.wikipedia.org/w/api.php"
WIKIDATA_API_URL = "https://www.wikidata.org/w/api.php"

def get_wikidata_id(title):
    """Fetches the Wikidata ID of a Wikipedia article."""
    params = {
        "action": "query",
        "titles": title,
        "prop": "pageprops",
        "format": "json"
    }
    response = requests.get(WIKI_API_URL, params=params).json()
    pages = response.get("query", {}).get("pages", {})
    
    for page in pages.values():
        return page.get("pageprops", {}).get("wikibase_item")

def get_genre_from_wikidata(wikidata_id):
    """Fetches genre information from Wikidata."""
    params = {
        "action": "wbgetentities",
        "ids": wikidata_id,
        "props": "claims",
        "format": "json"
    }
    response = requests.get(WIKIDATA_API_URL, params=params).json()
    
    claims = response.get("entities", {}).get(wikidata_id, {}).get("claims", {})
    genres = []

    if "P136" in claims:  # P136 is the genre property
        for genre in claims["P136"]:
            genre_id = genre["mainsnak"]["datavalue"]["value"]["id"]
            genre_name = get_label_from_wikidata(genre_id)
            genres.append(genre_name)

    return genres

def get_label_from_wikidata(entity_id):
    """Fetches the label (name) of a Wikidata entity."""
    params = {
        "action": "wbgetentities",
        "ids": entity_id,
        "props": "labels",
        "languages": "da",  # Get label in Danish
        "format": "json"
    }
    response = requests.get(WIKIDATA_API_URL, params=params).json()
    return response.get("entities", {}).get(entity_id, {}).get("labels", {}).get("da", {}).get("value", "Unknown")

# Example: Fetch genre for a musician or band
article_title = "Metallica"  # Replace with any band or artist
wikidata_id = get_wikidata_id(article_title)

if wikidata_id:
    genres = get_genre_from_wikidata(wikidata_id)
    print(f"Genres for {article_title}: {', '.join(genres) if genres else 'Not found'}")
else:
    print(f"Wikidata ID not found for {article_title}.")

Genres for Metallica: thrash metal, heavy metal


In [10]:
import requests

WIKI_API_URL = "https://da.wikipedia.org/w/api.php"
WIKIDATA_API_URL = "https://www.wikidata.org/w/api.php"

def get_wikidata_id(title):
    """Fetches the Wikidata ID of a Wikipedia article."""
    params = {
        "action": "query",
        "titles": title,
        "prop": "pageprops",
        "format": "json"
    }
    response = requests.get(WIKI_API_URL, params=params).json()
    pages = response.get("query", {}).get("pages", {})
    
    for page in pages.values():
        return page.get("pageprops", {}).get("wikibase_item")

def get_members_from_wikidata(wikidata_id):
    """Fetches members of a band from Wikidata."""
    params = {
        "action": "wbgetentities",
        "ids": wikidata_id,
        "props": "claims",
        "format": "json"
    }
    response = requests.get(WIKIDATA_API_URL, params=params).json()
    
    claims = response.get("entities", {}).get(wikidata_id, {}).get("claims", {})
    members = []

    # P527 = has part / member
    if "P527" in claims:
        for part in claims["P527"]:
            try:
                member_id = part["mainsnak"]["datavalue"]["value"]["id"]
                member_name = get_label_from_wikidata(member_id)
                members.append(member_name)
            except KeyError:
                continue

    return members

def get_label_from_wikidata(entity_id):
    """Fetches the label (name) of a Wikidata entity."""
    params = {
        "action": "wbgetentities",
        "ids": entity_id,
        "props": "labels",
        "languages": "da",  # Danish labels
        "format": "json"
    }
    response = requests.get(WIKIDATA_API_URL, params=params).json()
    return response.get("entities", {}).get(entity_id, {}).get("labels", {}).get("da", {}).get("value", "Unknown")

# Example: Fetch members for a music group
article_title = "Baby in Vain"  # Replace with any group
wikidata_id = get_wikidata_id(article_title)

if wikidata_id:
    members = get_members_from_wikidata(wikidata_id)
    print(f"Members of {article_title}: {', '.join(members) if members else 'Not found'}")
else:
    print(f"Wikidata ID not found for {article_title}.")

Members of Baby in Vain: Lola Hammerich, Benedicte Pierleoni, Andrea Thuesen
