### Imports

In [1]:
########################### DataSet ########################### 
# https://github.com/cvdfoundation/google-landmark?tab=readme-ov-file#release-history
###############################################################

import os
import pandas as pd
import hashlib
import requests
import tarfile
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path

base_dir = Path(os.getcwd()).resolve().parent
base_dir

WindowsPath('C:/Users/diogo/Desktop/APVC/APVC-ProjetoFinal')

In [2]:
data_dir = base_dir / 'data'
data_dir.mkdir(parents=True, exist_ok=True)

## 1. Recolher os dados

In [3]:
dataLandMarkTrain_dir = data_dir / 'land_mark' / 'train'
dataLandMarkTrain_dir.mkdir(parents=True, exist_ok=True)

urls = [
    "https://s3.amazonaws.com/google-landmark/metadata/train.csv",
    "https://s3.amazonaws.com/google-landmark/metadata/train_clean.csv",
    "https://s3.amazonaws.com/google-landmark/metadata/train_attribution.csv",
    "https://s3.amazonaws.com/google-landmark/metadata/train_label_to_category.csv",
    "https://s3.amazonaws.com/google-landmark/metadata/train_label_to_hierarchical.csv"
]

# Download dos ficheiros apenas se não existirem
for u in urls:
    file_name = Path(u).name
    save_path = dataLandMarkTrain_dir / file_name

    if save_path.exists():
        print(f"Já existe: {save_path}")
        continue

    response = requests.get(u)
    response.raise_for_status()

    with open(save_path, "wb") as f:
        f.write(response.content)

    print(f"Guardado: {save_path}")


Já existe: C:\Users\diogo\Desktop\APVC\APVC-ProjetoFinal\data\land_mark\train\train.csv
Já existe: C:\Users\diogo\Desktop\APVC\APVC-ProjetoFinal\data\land_mark\train\train_clean.csv
Já existe: C:\Users\diogo\Desktop\APVC\APVC-ProjetoFinal\data\land_mark\train\train_attribution.csv
Já existe: C:\Users\diogo\Desktop\APVC\APVC-ProjetoFinal\data\land_mark\train\train_label_to_category.csv
Já existe: C:\Users\diogo\Desktop\APVC\APVC-ProjetoFinal\data\land_mark\train\train_label_to_hierarchical.csv


Vou utilizar o **train_clean.csv** como base, já que este possui imagens mais fiáveis quando comparado com **train.csv**.

## 2. Merge dos DataSets

In [4]:
def colunas_em_comum(df1, df2):
    """
    Recebe dois DataFrames e devolve o conjunto de colunas em comum.
    """
    return set(df1.columns) & set(df2.columns)

### 2.1. Train_clean.csv - DataSet Base

In [5]:
train_clean = pd.read_csv(dataLandMarkTrain_dir / "train_clean.csv")
train_clean["images"] = train_clean["images"].apply(lambda x: x.split() if pd.notnull(x) else [])
train_clean.head()

Unnamed: 0,landmark_id,images
0,1,"[17660ef415d37059, 92b6290d571448f6, cd41bf948..."
1,7,"[25c9dfc7ea69838d, 28b13f94a6f1f3c1, 307d6584f..."
2,9,"[0193b65bb58d2c77, 1a30a51a287ecf69, 1f4e8ab1f..."
3,11,"[1a6cb1deed46bb17, 1cc2c8fbc83e1a0c, 2361b8da8..."
4,12,"[0a199c97c382b1ff, 1492a5d344495391, 290097bd3..."


A coluna `images` corresponde ao **`id`** do *train.csv*

### 2.2. train_label_to_hierarchical.csv - Filtrar pelas categorias

In [6]:
train_label_to_hierarchical = pd.read_csv(dataLandMarkTrain_dir / "train_label_to_hierarchical.csv")
train_label_to_hierarchical.head()

Unnamed: 0,landmark_id,category,supercategory,hierarchical_label,natural_or_human_made
0,0,http://commons.wikimedia.org/wiki/Category:Hap...,horse racing venue,sports venue,human-made
1,1,http://commons.wikimedia.org/wiki/Category:Lui...,park,parks,natural
2,2,http://commons.wikimedia.org/wiki/Category:Gra...,mountain,mountain,natural
3,5,http://commons.wikimedia.org/wiki/Category:Lak...,motorsport racing track,road,human-made
4,7,http://commons.wikimedia.org/wiki/Category:Spa...,multi-purpose hall,,


In [7]:
mergeWith = colunas_em_comum(train_clean, train_label_to_hierarchical)
train_clean = train_clean.merge(train_label_to_hierarchical, on=list(mergeWith), how='left')
train_clean.head()

Unnamed: 0,landmark_id,images,category,supercategory,hierarchical_label,natural_or_human_made
0,1,"[17660ef415d37059, 92b6290d571448f6, cd41bf948...",http://commons.wikimedia.org/wiki/Category:Lui...,park,parks,natural
1,7,"[25c9dfc7ea69838d, 28b13f94a6f1f3c1, 307d6584f...",http://commons.wikimedia.org/wiki/Category:Spa...,multi-purpose hall,,
2,9,"[0193b65bb58d2c77, 1a30a51a287ecf69, 1f4e8ab1f...",,,,
3,11,"[1a6cb1deed46bb17, 1cc2c8fbc83e1a0c, 2361b8da8...",http://commons.wikimedia.org/wiki/Category:Mer...,market hall,market,human-made
4,12,"[0a199c97c382b1ff, 1492a5d344495391, 290097bd3...",http://commons.wikimedia.org/wiki/Category:Was...,architectural structure,,


### 2.3. train.csv - Retirar os links das imagens

In [8]:
train = pd.read_csv(dataLandMarkTrain_dir / "train.csv")
train["url"] = train["url"].apply(lambda x: x.split() if pd.notnull(x) else [])
train.head()

Unnamed: 0,id,url,landmark_id
0,6e158a47eb2ca3f6,[https://upload.wikimedia.org/wikipedia/common...,142820
1,202cd79556f30760,[http://upload.wikimedia.org/wikipedia/commons...,104169
2,3ad87684c99c06e1,[http://upload.wikimedia.org/wikipedia/commons...,37914
3,e7f70e9c61e66af3,[https://upload.wikimedia.org/wikipedia/common...,102140
4,4072182eddd0100e,[https://upload.wikimedia.org/wikipedia/common...,2474


In [9]:
id_to_url = dict(zip(train['id'], train['url']))

def ids_para_urls(lista_ids):
    if not isinstance(lista_ids, list):
        return []
    urls = []
    for i in lista_ids:
        url = id_to_url.get(i)
        if url:
            if isinstance(url, list):
                urls.extend(url)  # adiciona todos os urls da lista
            else:
                urls.append(url)  # adiciona url único
    return urls

# Aplicar à coluna 'images' do train_clean e criar nova coluna 'urls'
train_clean['urls'] = train_clean['images'].apply(ids_para_urls)
train_clean

Unnamed: 0,landmark_id,images,category,supercategory,hierarchical_label,natural_or_human_made,urls
0,1,"[17660ef415d37059, 92b6290d571448f6, cd41bf948...",http://commons.wikimedia.org/wiki/Category:Lui...,park,parks,natural,[http://upload.wikimedia.org/wikipedia/commons...
1,7,"[25c9dfc7ea69838d, 28b13f94a6f1f3c1, 307d6584f...",http://commons.wikimedia.org/wiki/Category:Spa...,multi-purpose hall,,,[https://upload.wikimedia.org/wikipedia/common...
2,9,"[0193b65bb58d2c77, 1a30a51a287ecf69, 1f4e8ab1f...",,,,,[https://upload.wikimedia.org/wikipedia/common...
3,11,"[1a6cb1deed46bb17, 1cc2c8fbc83e1a0c, 2361b8da8...",http://commons.wikimedia.org/wiki/Category:Mer...,market hall,market,human-made,[https://upload.wikimedia.org/wikipedia/common...
4,12,"[0a199c97c382b1ff, 1492a5d344495391, 290097bd3...",http://commons.wikimedia.org/wiki/Category:Was...,architectural structure,,,[https://upload.wikimedia.org/wikipedia/common...
...,...,...,...,...,...,...,...
81308,203083,"[1def5ad0872c6303, 23349c63e48a7de2, 2c9d17eeb...",http://commons.wikimedia.org/wiki/Category:St....,parish church,church,human-made,[https://upload.wikimedia.org/wikipedia/common...
81309,203085,"[0926becdb2c92f8a, 22b58ac428531da8, 2c9054957...",http://commons.wikimedia.org/wiki/Category:Chu...,church building,church,human-made,[https://upload.wikimedia.org/wikipedia/common...
81310,203087,"[146cc06310d08ef0, 1ee045e5a3bc9568, 389594711...",http://commons.wikimedia.org/wiki/Category:Jac...,park,parks,natural,[https://upload.wikimedia.org/wikipedia/common...
81311,203091,"[8e219a79ee5eede9, fa7142e44850dbac]",http://commons.wikimedia.org/wiki/Category:Sil...,landscape park of Poland,parks,natural,[https://upload.wikimedia.org/wikipedia/common...


In [10]:
pd.set_option('display.max_colwidth', None)
train_clean["urls"]

0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       

## 3. Filtrar os dados 

In [None]:
del train_label_to_hierarchical
del train

### 3.1. Classe da foto

In [11]:
pd.reset_option('display.max_colwidth')
train_clean["supercategory"].unique()

array(['park', 'multi-purpose hall', nan, ...,
       'ancient Roman architecture', 'common land',
       'Naturschutzgebiet (NSG HA 191)'], dtype=object)

In [12]:
counts = train_clean["supercategory"].value_counts()
counts_more_than_10 = counts[counts > 200]
print(counts_more_than_10)

supercategory
church building            11024
castle                      2058
mountain                    1520
museum                      1320
building                    1157
château                     1144
lighthouse                   889
lake                         889
architectural structure      771
monastery                    738
palace                       686
parish church                605
monument                     569
square                       565
cathedral                    547
cemetery                     543
park                         540
art museum                   523
house                        468
archaeological site          451
skyscraper                   426
bridge                       397
chapel                       391
island                       388
reservoir                    378
mosque                       368
tower                        355
Buddhist temple              355
sculpture                    332
waterfall                    

In [13]:
monumento_categorias = [
    # Tenho a certeza que são monumentos
    'church building', 'castle', 'monastery', 'palace',
    'parish church', 'monument', 'cathedral', 'chapel',
    'abbey', 'fort', 'city gate', 'Catholic cathedral',
    'castle ruin',

    # Não tenho a certeza que são monumentos
    'building', 'architectural structure', 'historic house museum',
    'cultural property', 'sculpture', 'fountain', 'tower',
    'lighthouse', 'bridge', 'square', 'city hall'
]

df_monumentos = train_clean[train_clean['supercategory'].isin(monumento_categorias)]
df_monumentos

Unnamed: 0,landmark_id,images,category,supercategory,hierarchical_label,natural_or_human_made,urls
4,12,"[0a199c97c382b1ff, 1492a5d344495391, 290097bd3...",http://commons.wikimedia.org/wiki/Category:Was...,architectural structure,,,[https://upload.wikimedia.org/wikipedia/common...
6,22,"[0be5d581f54d3116, 121754b8854c9757, 3238a06ee...",http://commons.wikimedia.org/wiki/Category:Cas...,castle,castle / fort,human-made,[http://upload.wikimedia.org/wikipedia/commons...
10,29,"[9a03db05d8fb4850, c612327c67863d9e, c6a1efedc...",http://commons.wikimedia.org/wiki/Category:St_...,church building,church,human-made,[https://upload.wikimedia.org/wikipedia/common...
15,37,"[022d4bc3b72988eb, 0d920307064d9e9d, 0e3d48fc1...",http://commons.wikimedia.org/wiki/Category:Chu...,church building,church,human-made,[http://upload.wikimedia.org/wikipedia/commons...
17,43,"[01d1a2461b033111, 0e2ccd04e9d0dc2e, 1753e0128...",http://commons.wikimedia.org/wiki/Category:Abb...,abbey,church,human-made,[https://upload.wikimedia.org/wikipedia/common...
...,...,...,...,...,...,...,...
81295,203054,"[79713986a0d603f9, a67fda97f7c7eb37, ed0efcd25...",http://commons.wikimedia.org/wiki/Category:1_D...,building,,,[https://upload.wikimedia.org/wikipedia/common...
81297,203057,"[0fccea3669efad6e, 261eb6778621f466, 2e41935f7...",http://commons.wikimedia.org/wiki/Category:Ser...,building,,,[https://upload.wikimedia.org/wikipedia/common...
81300,203063,"[113e2bed03e2a1ae, 57b5da8ecf0f8ed2, 5d4ffb9f3...",http://commons.wikimedia.org/wiki/Category:Zve...,monastery,monastery,human-made,[https://upload.wikimedia.org/wikipedia/common...
81308,203083,"[1def5ad0872c6303, 23349c63e48a7de2, 2c9d17eeb...",http://commons.wikimedia.org/wiki/Category:St....,parish church,church,human-made,[https://upload.wikimedia.org/wikipedia/common...


In [14]:
counts = df_monumentos["hierarchical_label"].value_counts(dropna=False)
print(counts)

hierarchical_label
church                 13100
NaN                     2756
castle / fort           2532
lighthouse               889
monastery                738
palace                   686
square                   565
bridge                   397
tower                    355
sculpture                332
government building      253
house                    241
gate                     207
fountain                 206
Name: count, dtype: int64


### 3.2. Human Made (Não tenho a certeza, não vou alterar)

In [15]:
counts = df_monumentos["natural_or_human_made"].value_counts(dropna=False)
print(counts)

natural_or_human_made
human-made    20501
NaN            2756
Name: count, dtype: int64


In [16]:
# Existem nulos que me parecem ser monumentos
pd.set_option('display.max_colwidth', None)
df_monumentos[df_monumentos['natural_or_human_made'].isna()]["category"]

4                      http://commons.wikimedia.org/wiki/Category:Wasserkunstanlage_Paradies_(Baden-Baden)
48                                         http://commons.wikimedia.org/wiki/Category:Shemokmedi_Cathedral
55       http://commons.wikimedia.org/wiki/Category:Est%C3%A1tua_de_D._Pedro_IV_na_Pra%C3%A7a_da_Liberdade
80                                   http://commons.wikimedia.org/wiki/Category:Quedlinburger_Stadtschloss
188                                               http://commons.wikimedia.org/wiki/Category:Valhallabadet
                                                       ...                                                
81165                                   http://commons.wikimedia.org/wiki/Category:Slovansk%C3%BD_d%C5%AFm
81173                                http://commons.wikimedia.org/wiki/Category:High_Roller_(Ferris_wheel)
81222                                                  http://commons.wikimedia.org/wiki/Category:Walden_7
81295                              ht

## 4. API da MediaWiki - *https://m.mediawiki.org/wiki/API:Main_page*

In [20]:
pd.reset_option('display.max_colwidth')
df_monumentos.head()

Unnamed: 0,landmark_id,images,category,supercategory,hierarchical_label,natural_or_human_made,urls
4,12,"[0a199c97c382b1ff, 1492a5d344495391, 290097bd3...",http://commons.wikimedia.org/wiki/Category:Was...,architectural structure,,,[https://upload.wikimedia.org/wikipedia/common...
6,22,"[0be5d581f54d3116, 121754b8854c9757, 3238a06ee...",http://commons.wikimedia.org/wiki/Category:Cas...,castle,castle / fort,human-made,[http://upload.wikimedia.org/wikipedia/commons...
10,29,"[9a03db05d8fb4850, c612327c67863d9e, c6a1efedc...",http://commons.wikimedia.org/wiki/Category:St_...,church building,church,human-made,[https://upload.wikimedia.org/wikipedia/common...
15,37,"[022d4bc3b72988eb, 0d920307064d9e9d, 0e3d48fc1...",http://commons.wikimedia.org/wiki/Category:Chu...,church building,church,human-made,[http://upload.wikimedia.org/wikipedia/commons...
17,43,"[01d1a2461b033111, 0e2ccd04e9d0dc2e, 1753e0128...",http://commons.wikimedia.org/wiki/Category:Abb...,abbey,church,human-made,[https://upload.wikimedia.org/wikipedia/common...


In [21]:
pd.set_option('display.max_colwidth', None)
df_monumentos["category"]

4           http://commons.wikimedia.org/wiki/Category:Wasserkunstanlage_Paradies_(Baden-Baden)
6                               http://commons.wikimedia.org/wiki/Category:Castle_of_Santa_Cruz
10                                 http://commons.wikimedia.org/wiki/Category:St_Peter,_Claydon
15                   http://commons.wikimedia.org/wiki/Category:Church_of_Saint_Stephen,_Prague
17                        http://commons.wikimedia.org/wiki/Category:Abbaye_de_Caunes-Minervois
                                                  ...                                          
81295                   http://commons.wikimedia.org/wiki/Category:1_Doki_Street_in_Gda%C5%84sk
81297                    http://commons.wikimedia.org/wiki/Category:Serbian_Patriarchy_building
81300                               http://commons.wikimedia.org/wiki/Category:Zverin_Monastery
81308         http://commons.wikimedia.org/wiki/Category:St._Peter's_Parish_Church_(Radovljica)
81309    http://commons.wikimedia.org/wi

In [22]:
import requests
from urllib.parse import urlparse, unquote
pd.reset_option('display.max_colwidth')

def extrair_titulo(url):
    """Extrai o título da página Wikimedia a partir da URL"""
    path = urlparse(url).path
    titulo = path.split('/')[-1]  # Última parte da URL
    return unquote(titulo)


def obter_info_wikimedia(titulo):
    """Consulta a Wikimedia API e retorna info básica sobre o título"""
    endpoint = "https://commons.wikimedia.org/w/api.php"
    params = {
        "action": "query",
        "prop": "coordinates|pageprops|description",
        "titles": titulo,
        "format": "json",
        "formatversion": 2
    }
    headers = {
        "User-Agent": "ProjetoMonumentosPT/1.0 (teu@email.com)"
    }
    response = requests.get(endpoint, params=params, headers=headers)
    response.raise_for_status()
    return response.json()


def obter_info_wikidata(qid):
    """Consulta a Wikidata API com o QID para obter dados detalhados"""
    url = f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json"
    response = requests.get(url)
    response.raise_for_status()
    return response.json()


def obter_label_por_qid(qid, lang="en"):
    """Consulta o rótulo de um QID (por ex. país, cidade)"""
    url = f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json"
    response = requests.get(url)
    response.raise_for_status()
    data = response.json()
    entidade = list(data["entities"].values())[0]
    return entidade["labels"].get(lang, {}).get("value", "Desconhecido")


def extrair_detalhes_wikidata(entidade):
    """Extrai dados úteis da estrutura JSON da Wikidata"""
    claims = entidade["claims"]

    def extrair_qid(p):
        if p in claims:
            return claims[p][0]["mainsnak"]["datavalue"]["value"]["id"]
        return None

    def extrair_varios_qids(p):
        if p in claims:
            return [c["mainsnak"]["datavalue"]["value"]["id"] for c in claims[p]]
        return []

    resultado = {
        "pais_qid": extrair_qid("P17"),
        "localizacoes_qid": extrair_varios_qids("P131"),
        "instancias_qid": extrair_varios_qids("P31"),
        "label": entidade.get("labels", {}).get("en", {}).get("value", "Sem título")
    }

    return resultado


# --- Execução (exemplo) ---

url_exemplo = "https://commons.wikimedia.org/wiki/Category:Wasserkunstanlage_Paradies_(Baden-Baden)"
titulo = extrair_titulo(url_exemplo)

# 1. Obter info da Wikimedia
dados_wiki = obter_info_wikimedia(titulo)
pagina = dados_wiki["query"]["pages"][0]

coordenadas = pagina.get("coordinates", [{}])[0]
wikibase_item = pagina.get("pageprops", {}).get("wikibase_item", None)

print("📌 Exemplo de monumento extraído:")
print("🔗 URL:", url_exemplo)
print("📄 Título Wikimedia:", titulo)
print("🗺️ Coordenadas:", coordenadas)
print("🔗 Wikibase Item:", wikibase_item)

if wikibase_item:
    dados_wikidata = obter_info_wikidata(wikibase_item)
    entidade = dados_wikidata["entities"][wikibase_item]
    detalhes = extrair_detalhes_wikidata(entidade)

    print(f"\n🏛️ Nome do Monumento: {detalhes['label']}")

    nome_pais = obter_label_por_qid(detalhes["pais_qid"]) if detalhes["pais_qid"] else "Desconhecido"
    print("🌍 País:", nome_pais)

    # Mostrar todas as localizações administrativas (P131)
    print("🏘️ Localizações administrativas (P131):")
    for qid in detalhes["localizacoes_qid"]:
        nome_local = obter_label_por_qid(qid)
        print(f"   - {nome_local} (QID: {qid})")

    # Mostrar tipos (P31)
    print("🏷️ Tipos (instância de):")
    for qid in detalhes["instancias_qid"]:
        nome_tipo = obter_label_por_qid(qid)
        print(f"   - {nome_tipo} (QID: {qid})")

else:
    print("❌ Não foi possível obter o Wikidata item.")

📌 Exemplo de monumento extraído:
🔗 URL: https://commons.wikimedia.org/wiki/Category:Wasserkunstanlage_Paradies_(Baden-Baden)
📄 Título Wikimedia: Category:Wasserkunstanlage_Paradies_(Baden-Baden)
🗺️ Coordenadas: {'lat': 48.7625, 'lon': 8.25154, 'primary': True, 'globe': 'earth'}
🔗 Wikibase Item: Q2551242

🏛️ Nome do Monumento: Wasserkunstanlage Paradies (Baden-Baden)
🌍 País: Germany
🏘️ Localizações administrativas (P131):
   - Baden-Baden (QID: Q4100)
🏷️ Tipos (instância de):
   - architectural structure (QID: Q811979)
   - park (QID: Q22698)


In [None]:
from urllib.parse import urlparse, unquote
from tqdm import tqdm

# -------- Funções auxiliares --------

def extrair_titulo(url):
    path = urlparse(url).path
    titulo = path.split('/')[-1]
    return unquote(titulo)

def obter_info_wikimedia(titulo):
    endpoint = "https://commons.wikimedia.org/w/api.php"
    params = {
        "action": "query",
        "prop": "coordinates|pageprops|description",
        "titles": titulo,
        "format": "json",
        "formatversion": 2
    }
    headers = {
        "User-Agent": "ProjetoMonumentosPT/1.0 (teu@email.com)"
    }
    response = requests.get(endpoint, params=params, headers=headers)
    response.raise_for_status()
    return response.json()

def obter_info_wikidata(qid):
    url = f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json"
    response = requests.get(url)
    response.raise_for_status()
    return response.json()

def obter_label_por_qid(qid, lang="en"):
    url = f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json"
    response = requests.get(url)
    response.raise_for_status()
    data = response.json()
    entidade = list(data["entities"].values())[0]
    return entidade["labels"].get(lang, {}).get("value", "Desconhecido")

def extrair_detalhes_wikidata(entidade):
    claims = entidade["claims"]

    def extrair_qid(p):
        if p in claims:
            return claims[p][0]["mainsnak"]["datavalue"]["value"]["id"]
        return None

    def extrair_varios_qids(p):
        if p in claims:
            return [c["mainsnak"]["datavalue"]["value"]["id"] for c in claims[p]]
        return []

    return {
        "pais_qid": extrair_qid("P17"),
        "localizacoes_qid": extrair_varios_qids("P131"),
        "instancias_qid": extrair_varios_qids("P31"),
        "label": entidade.get("labels", {}).get("en", {}).get("value", "Sem título")
    }

# -------- Aplicar ao DataFrame --------

def processar_url(url):
    try:
        titulo = extrair_titulo(url)
        dados_wiki = obter_info_wikimedia(titulo)
        pagina = dados_wiki["query"]["pages"][0]

        coordenadas = pagina.get("coordinates", [{}])[0]
        lat = coordenadas.get("lat", None)
        lon = coordenadas.get("lon", None)
        wikibase_item = pagina.get("pageprops", {}).get("wikibase_item", None)

        if not wikibase_item:
            return pd.Series([lat, lon, None, None, None, None, None])

        dados_wikidata = obter_info_wikidata(wikibase_item)
        entidade = dados_wikidata["entities"][wikibase_item]
        detalhes = extrair_detalhes_wikidata(entidade)

        nome_monumento = detalhes["label"]

        # País (P17)
        pais_qid = detalhes["pais_qid"]
        pais = obter_label_por_qid(pais_qid) if pais_qid else None

        # Localizações administrativas (P131)
        localizacoes = [obter_label_por_qid(qid) for qid in detalhes["localizacoes_qid"]]

        # Tipos (instância de, P31)
        tipos = [obter_label_por_qid(qid) for qid in detalhes["instancias_qid"]]

        return pd.Series([
            lat,
            lon,
            nome_monumento,
            pais,
            localizacoes,
            tipos,
            wikibase_item
        ])

    except Exception as e:
        print(f"Erro ao processar {url}: {e}")
        return pd.Series([None] * 7)

# -------- Executar o código --------
tqdm.pandas()
df_monumentos[[
    'lat',
    'lon',
    'nome_monumento',
    'pais',
    'localizacoes_administrativas',
    'tipos_instancia_de',
    'wikibase_item'
]] = df_monumentos['category'].progress_apply(processar_url)


# Salvar se quiseres:
df_monumentos.to_csv("Monumentos_clean.csv", index=False)
df_monumentos.head()

100%|██████████| 20/20 [01:14<00:00,  3.75s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_monumentos_20[[
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_monumentos_20[[
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_monumentos_20[[
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_in

In [None]:
Pais_escolhido = "Portugal"
df_monumentos_pt = df_monumentos[df_monumentos["pais"] == Pais_escolhido]
df_monumentos_pt

Unnamed: 0,landmark_id,images,category,supercategory,hierarchical_label,natural_or_human_made,urls,lat,lon,nome_monumento,pais,localizacoes_administrativas,tipos_instancia_de,wikibase_item
6,22,"[0be5d581f54d3116, 121754b8854c9757, 3238a06ee...",http://commons.wikimedia.org/wiki/Category:Cas...,castle,castle / fort,human-made,[http://upload.wikimedia.org/wikipedia/commons...,43.3483,-8.35,Castillo de Santa Cruz,Spain,"[Liáns, Oleiros]","[castle, monument]",Q10283898
