In [7]:
import requests
import pandas as pd
import osmnx as ox
import geopandas as gpd

In [8]:
# SPARQL query to fetch monuments in Poland
query = """
SELECT ?item ?itemLabel ?location ?coordinates ?inception ?description WHERE {
  ?item wdt:P31 wd:Q4989906. # Instance of 'cultural heritage monument'
  ?item wdt:P17 wd:Q36.      # Located in Poland
  OPTIONAL { ?item wdt:P625 ?coordinates. } # Coordinates
  OPTIONAL { ?item wdt:P276 ?location. }    # Location
  OPTIONAL { ?item wdt:P571 ?inception. }   # Year built
  OPTIONAL { ?item schema:description ?description. FILTER (LANG(?description) = "en") }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""

# Wikidata SPARQL endpoint
url = "https://query.wikidata.org/sparql"
response = requests.get(url, params={"query": query, "format": "json"})
data = response.json()

In [9]:
monuments = data["results"]["bindings"]
df_wikidata = pd.DataFrame([
    {
        "name": item["itemLabel"]["value"],
        "coordinates": item.get("coordinates", {}).get("value", None),
        "location": item.get("location", {}).get("value", None),
        "year_built": item.get("inception", {}).get("value", None),
        "description": item.get("description", {}).get("value", None)
    }
    for item in monuments
])

In [10]:
# Split coordinates into latitude and longitude
df_wikidata[["latitude", "longitude"]] = df_wikidata["coordinates"].str.extract(r'Point\((.*?) (.*?)\)')
df_wikidata["latitude"] = pd.to_numeric(df_wikidata["latitude"])
df_wikidata["longitude"] = pd.to_numeric(df_wikidata["longitude"])

# Drop rows without coordinates
df_wikidata.dropna(subset=["latitude", "longitude"], inplace=True)

# Preview the data
print(df_wikidata.head())

                                name                 coordinates location  \
1                          Q11823240      Point(16.9472 52.4127)     None   
2  Jan Kochanowski monument in Radom  Point(21.158056 51.400389)     None   
3    John Paul II Monument in Kalisz       Point(18.0935 51.763)     None   
4    John Paul II Monument in Leszno      Point(16.5924 51.8472)     None   
5                          Q11823244      Point(19.9966 50.0946)     None   

  year_built                 description   latitude  longitude  
1       None  monument in Poznań, Poland  16.947200  52.412700  
2       None          monument in Poland  21.158056  51.400389  
3       None  monument in Kalisz, Poland  18.093500  51.763000  
4       None  monument in Leszno, Poland  16.592400  51.847200  
5       None  monument in Kraków, Poland  19.996600  50.094600  


In [11]:
# Define the place and tags
tags = {"historic": True}

# Query using osmnx
gdf_osm = ox.geometries.geometries_from_place("Poland", tags)

# Reset the index and extract relevant columns
gdf_osm = gdf_osm.reset_index()[["name", "geometry"]]

# Extract latitude and longitude
gdf_osm["latitude"] = gdf_osm.geometry.y
gdf_osm["longitude"] = gdf_osm.geometry.x

# Drop rows without names
gdf_osm = gdf_osm.dropna(subset=["name"])

# Preview the data
print(gdf_osm.head())

AttributeError: 'GeoDataFrame' object has no attribute 'append'

In [None]:
# Combine datasets
df_combined = pd.concat([df_wikidata, df_osm], ignore_index=True)

# Drop duplicates by name and coordinates
df_combined = df_combined.drop_duplicates(subset=["name", "latitude", "longitude"])

# Fill missing values in key columns (if applicable)
df_combined["description"].fillna("No description available", inplace=True)

# Save the dataset to CSV
df_combined.to_csv("poland_monuments_dataset.csv", index=False)

# Preview combined dataset
print(df_combined.head())