In [1]:
import polars as pl

In [2]:
df = pl.read_json('data_raw/papezstvo_raw.json')

In [3]:
mista_narozeni = df.select(pl.col('narozeni_misto')).unique().drop_nulls().to_series().to_list()
mista_narozeni

['Q200441',
 'Q803',
 'Q5489',
 'Q643907',
 'Q6662',
 'Q2277',
 'Q2802461',
 'Q660355',
 'Q55015',
 'Q272897',
 'Q589578',
 'Q99941',
 'Q693973',
 'Q56071',
 'Q23253',
 'Q831022',
 'Q2759',
 'Q128089',
 'Q873126',
 'Q243168',
 'Q220',
 'Q463459',
 'Q124067',
 'Q3833391',
 'Q13437',
 'Q46787',
 'Q161685',
 'Q1438',
 'Q41',
 'Q159605',
 'Q1297',
 'Q278199',
 'Q83113',
 'Q111954',
 'Q6259',
 'Q159678',
 'Q124925',
 'Q181238',
 'Q40219',
 'Q2969360',
 'Q2634',
 'Q5475',
 'Q191115',
 'Q18493402',
 'Q53216',
 'Q13142',
 'Q954684',
 'Q20413',
 'Q199160',
 'Q270613',
 'Q50157',
 'Q2044',
 'Q641',
 'Q1308',
 'Q13375',
 'Q3995920',
 'Q47611',
 'Q270492',
 'Q212856',
 'Q116404',
 'Q54511',
 'Q242675',
 'Q40588',
 'Q42975',
 'Q490',
 'Q597',
 'Q101616',
 'Q1449',
 'Q1218',
 'Q243024',
 'Q73649',
 'Q501773',
 'Q241733',
 'Q528042',
 'Q128084',
 'Q1460',
 'Q1231483',
 'Q1048669',
 'Q833271',
 'Q1891',
 'Q688485',
 'Q980',
 'Q6558',
 'Q2656',
 'Q13670',
 'Q91341',
 'Q53952',
 'Q1330965',
 'Q779478',


In [4]:
import requests

In [5]:
def get_wikidata_info(wikidata_id):
    """
    Get coordinates and country information for a Wikidata ID.
    
    Args:
        wikidata_id (str): Wikidata ID (e.g., 'Q237')
        
    Returns:
        dict: Dictionary containing coordinates and country information
    """
    # Remove the 'Q' prefix if present in function calls
    if not wikidata_id.startswith('Q'):
        wikidata_id = f'Q{wikidata_id}'
        
    # Method 1: Direct API call
    url = f"https://www.wikidata.org/w/api.php"
    params = {
        "action": "wbgetentities",
        "ids": wikidata_id,
        "format": "json",
        "props": "claims|labels"
    }
    
    response = requests.get(url, params=params)
    data = response.json()
    
    if "entities" not in data or wikidata_id not in data["entities"]:
        return {"error": "Entity not found"}
    
    entity = data["entities"][wikidata_id]
    claims = entity.get("claims", {})
    
    result = {
        "wikidata_id": wikidata_id,
        "name": entity.get("labels", {}).get("en", {}).get("value", "Unknown")
    }
    
    # Get coordinates (P625)
    if "P625" in claims:
        coord_claim = claims["P625"][0]
        if "datavalue" in coord_claim["mainsnak"]:
            coord_value = coord_claim["mainsnak"]["datavalue"]["value"]
            result["coordinates"] = {
                "latitude": coord_value["latitude"],
                "longitude": coord_value["longitude"]
            }
    
    # Get country (P17)
    if "P17" in claims:
        country_claim = claims["P17"][0]
        if "datavalue" in country_claim["mainsnak"]:
            country_id = country_claim["mainsnak"]["datavalue"]["value"]["id"]
            result["country_id"] = country_id
            
            # Get country name with a separate request
            country_url = f"https://www.wikidata.org/w/api.php"
            country_params = {
                "action": "wbgetentities",
                "ids": country_id,
                "format": "json",
                "props": "labels"
            }
            country_response = requests.get(country_url, params=country_params)
            country_data = country_response.json()
            
            if "entities" in country_data and country_id in country_data["entities"]:
                country_name = country_data["entities"][country_id].get("labels", {}).get("en", {}).get("value", "Unknown")
                result["country_name"] = country_name
    
    return result

In [6]:
get_wikidata_info('Q3995920')

{'wikidata_id': 'Q3995920',
 'name': 'Tossignano',
 'coordinates': {'latitude': 44.272222222222, 'longitude': 11.603611111111},
 'country_id': 'Q38',
 'country_name': 'Italy'}

In [7]:
mista = []
for m in mista_narozeni:
    mista.append(get_wikidata_info(m))

In [8]:
import json

In [9]:
import os

In [10]:
with open(os.path.join('data_raw','mista_narozeni.json'), 'w+', encoding='utf-8') as export:
    export.write(json.dumps(mista))