## Alle Gepris Projekte in einer Liste

In [13]:
import json
import os

# Pfad zur JSON-Datei (relativ zum aktuellen Verzeichnis)
json_file_path = "output_all_persons_names_projects_links.json"

# Menge zur Speicherung der eindeutigen Projekt-IDs
project_ids_set = set()

# Vollständigen Pfad zur JSON-Datei erstellen
full_json_file_path = os.path.join(os.getcwd(), json_file_path)

# JSON-Datei einlesen und Daten laden
with open(full_json_file_path, 'r', encoding='utf-8') as json_file:
    data = json.load(json_file)

# Durchlaufe jeden Eintrag in der Liste
for entry in data:
    # Extrahiere die Projekte aus dem Eintrag
    projects = entry.get('Projekte', [])
    
    # Durchlaufe die Projekte und extrahiere die Projekt-IDs
    for project in projects:
        project_link = project.get('Projekt Link', '')
        
        # Extrahiere die Projekt-ID aus dem Projekt-Link
        if project_link:
            project_id = project_link.split('/')[-1]
            # Füge die Projekt-ID zur Menge hinzu (automatisch wird nur eindeutige IDs behalten)
            project_ids_set.add(project_id)

# Konvertiere die Menge von Projekt-IDs in eine Liste (falls erforderlich)
project_ids_list = list(project_ids_set)

# Ausgabe der gesammelten eindeutigen Projekt-IDs
print("Liste der eindeutigen Projekt-IDs:")
print(project_ids_list)

Liste der eindeutigen Projekt-IDs:
['442032008', '5413368', '221270173', '71821268', '316102599', '18572122', '183605059', '492813820', '460129525', '433110396', '431352836', '509313233', '516780480', '5131248', '255821879', '198305071', '521476232', '445552570', '418603037', '424650015', '520751609', '505997786', '513892404', '447603908', '224619622', '424795268', '242504939', '40014398', '456668568', '491261247', '453182863', '270041755', '218318381', '5310710', '441914366', '325443116', '284237345', '5365365', '453229399', '319835486', '418004173', '314772579', '230488449', '24122740', '418598556', '290363600', '458957343', '5095944', '507302435', '326552732']


## Projekte welche bereits Wikidata einträge haben

In [37]:
import requests

for project in project_ids_list:
    api_url = f"https://www.wikidata.org/w/api.php?action=query&format=json&list=search&srsearch=P4870+{project}"

    response = requests.get(api_url)
    data = response.json()

    qids_list = []  # Liste zur Speicherung der QIDs

    if "query" in data and "search" in data["query"]:
        for result in data["query"]["search"]:
            q_id = result["title"]
            qids_list.append(q_id)  # Füge die QID zur Liste hinzu
            print(f"Q-ID for {search_term}: {q_id}")

    # Ausgabe der Liste mit QIDs
    print("List of QIDs:")
    print(qids_list)

Q-ID for 5413368: Q98380341
List of QIDs:
['Q98380341']
List of QIDs:
[]
List of QIDs:
[]
Q-ID for 5413368: Q116128332
List of QIDs:
['Q116128332']
List of QIDs:
[]
Q-ID for 5413368: Q116114558
List of QIDs:
['Q116114558']
List of QIDs:
[]
List of QIDs:
[]
Q-ID for 5413368: Q99534506
List of QIDs:
['Q99534506']
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
Q-ID for 5413368: Q116063769
List of QIDs:
['Q116063769']
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
Q-ID for 5413368: Q116121999
List of QIDs:
['Q116121999']
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
List of QIDs:
[]
Q-ID for 5413368: Q98380337
List of QIDs:
['Q98380337']
List of QIDs:
[]
List of QIDs:
[]
Q-ID for 5413368: Q116092079
List of QIDs:
['Q116092079']
List of QIDs:
[]
List of QIDs:
[

## diese in einer Liste/dict speichern mit QID und Namen

In [40]:
# Liste zur Speicherung der gefundenen Einträge
found_entries = []

for project in project_ids_list:
    api_url = f"https://www.wikidata.org/w/api.php?action=query&format=json&list=search&srsearch=P4870+{project}"

    response = requests.get(api_url)
    data = response.json()

    if "query" in data and "search" in data["query"]:
        for result in data["query"]["search"]:
            q_id = result["title"]
            label = result.get("label", "")  # Das Label des Suchergebnisses

            # Füge das gefundene Ergebnis der Liste hinzu
            found_entries.append({
                "project_id": project,
                "q_id": q_id,
                "label": label
            })

# Ausgabe der gefundenen Einträge
print("Gefundene Einträge:")
for entry in found_entries:
    print(f"Projekt-ID: {entry['project_id']}, Q-ID: {entry['q_id']}, Label: {entry['label']}")

print(found_entries)

Gefundene Einträge:
Projekt-ID: 442032008, Q-ID: Q98380341, Label: 
Projekt-ID: 71821268, Q-ID: Q116128332, Label: 
Projekt-ID: 18572122, Q-ID: Q116114558, Label: 
Projekt-ID: 460129525, Q-ID: Q99534506, Label: 
Projekt-ID: 5131248, Q-ID: Q116063769, Label: 
Projekt-ID: 40014398, Q-ID: Q116121999, Label: 
Projekt-ID: 441914366, Q-ID: Q98380337, Label: 
Projekt-ID: 5365365, Q-ID: Q116092079, Label: 
Projekt-ID: 5095944, Q-ID: Q116062070, Label: 
[{'project_id': '442032008', 'q_id': 'Q98380341', 'label': ''}, {'project_id': '71821268', 'q_id': 'Q116128332', 'label': ''}, {'project_id': '18572122', 'q_id': 'Q116114558', 'label': ''}, {'project_id': '460129525', 'q_id': 'Q99534506', 'label': ''}, {'project_id': '5131248', 'q_id': 'Q116063769', 'label': ''}, {'project_id': '40014398', 'q_id': 'Q116121999', 'label': ''}, {'project_id': '441914366', 'q_id': 'Q98380337', 'label': ''}, {'project_id': '5365365', 'q_id': 'Q116092079', 'label': ''}, {'project_id': '5095944', 'q_id': 'Q116062070', 

In [44]:
found_entries = []

for project in project_ids_list:
    api_url = f"https://www.wikidata.org/w/api.php?action=query&format=json&list=search&srsearch=P4870+{project}"

    response = requests.get(api_url)
    data = response.json()

    if "query" in data and "search" in data["query"]:
        for result in data["query"]["search"]:
            q_id = result["title"]

            # Weitere Abfrage, um das Label des Wikidata-Eintrags abzurufen
            entity_url = f"https://www.wikidata.org/wiki/Special:EntityData/{q_id}.json"
            entity_response = requests.get(entity_url)
            entity_data = entity_response.json()

            if "entities" in entity_data and q_id in entity_data["entities"]:
                entity_info = entity_data["entities"][q_id]
                label = entity_info.get('labels', {}).get('en', {}).get('value', '')  # Das Label des Eintrags

                # Füge das gefundene Ergebnis der Liste hinzu
                found_entries.append({
                    "project_id": project,
                    "q_id": q_id,
                    "label": label
                })

# Ausgabe der gefundenen Einträge
print("Gefundene Einträge:")
for entry in found_entries:
    print(f"Projekt-ID: {entry['project_id']}, Q-ID: {entry['q_id']}, Label: {entry['label']}")
print(found_entries)

Gefundene Einträge:
Projekt-ID: 442032008, Q-ID: Q98380341, Label: NFDI4Biodiversity
Projekt-ID: 71821268, Q-ID: Q116128332, Label: 
Projekt-ID: 18572122, Q-ID: Q116114558, Label: 
Projekt-ID: 460129525, Q-ID: Q99534506, Label: NFDI4Microbiota
Projekt-ID: 5131248, Q-ID: Q116063769, Label: 
Projekt-ID: 40014398, Q-ID: Q116121999, Label: 
Projekt-ID: 441914366, Q-ID: Q98380337, Label: GHGA
Projekt-ID: 5365365, Q-ID: Q116092079, Label: 
Projekt-ID: 5095944, Q-ID: Q116062070, Label: 
[{'project_id': '442032008', 'q_id': 'Q98380341', 'label': 'NFDI4Biodiversity'}, {'project_id': '71821268', 'q_id': 'Q116128332', 'label': ''}, {'project_id': '18572122', 'q_id': 'Q116114558', 'label': ''}, {'project_id': '460129525', 'q_id': 'Q99534506', 'label': 'NFDI4Microbiota'}, {'project_id': '5131248', 'q_id': 'Q116063769', 'label': ''}, {'project_id': '40014398', 'q_id': 'Q116121999', 'label': ''}, {'project_id': '441914366', 'q_id': 'Q98380337', 'label': 'GHGA'}, {'project_id': '5365365', 'q_id': 'Q11

## Alle artikel ohne eintrag in einer Liste Speichern

In [52]:
# Extrahiere die Projekt-IDs aus den gefundenen Einträgen
found_project_ids = {entry['project_id'] for entry in found_entries}

# Erstelle eine neue Liste, die nur die Projekt-IDs enthält, die nicht in den gefundenen Einträgen sind
non_existing_entries = [project_id for project_id in project_ids_list if project_id not in found_project_ids]

# Ausgabe der gefilterten Liste
print("Gefilterte Projekt-IDs:")
print(non_existing_entries)


Gefilterte Projekt-IDs:
['5413368', '221270173', '316102599', '183605059', '492813820', '433110396', '431352836', '509313233', '516780480', '255821879', '198305071', '521476232', '445552570', '418603037', '424650015', '520751609', '505997786', '513892404', '447603908', '224619622', '424795268', '242504939', '456668568', '491261247', '453182863', '270041755', '218318381', '5310710', '325443116', '284237345', '453229399', '319835486', '418004173', '314772579', '230488449', '24122740', '418598556', '290363600', '458957343', '507302435', '326552732']
