In [3]:
import pandas as pd
import requests
import time
import random
import string

df = pd.read_csv("xcl_map.csv")

names = df["display_name"].apply(lambda x: x.replace("_", " ")).tolist()

def get_wikidata_qid(name):
    query = f'''
    SELECT ?item WHERE {{
      ?item rdfs:label "{name}"@en.
    }} LIMIT 1
    '''
    url = "https://query.wikidata.org/sparql"
    headers = {"Accept": "application/sparql-results+json"}
    try:
        response = requests.get(url, params={'query': query}, headers=headers, timeout=10)
        data = response.json()
        results = data["results"]["bindings"]
        if results:
            qid = results[0]["item"]["value"].split("/")[-1]
            return qid
    except Exception as e:
        print(f"Error fetching for '{name}':", e)

    # Fallback: generate a random ID
    return "RND" + ''.join(random.choices(string.digits, k=6))

# Fetch Q-IDs for all names
print("Fetching Q-IDs...")
qid_map = {}
for name in names:
    qid = get_wikidata_qid(name)
    qid_map[name] = qid
    print(f"{name} → {qid}")
    time.sleep(1)  # avoid rate limits

# Map back to DataFrame
df["wikidata_qid"] = df["display_name"].apply(lambda x: qid_map[x.replace("_", " ")])



Fetching Q-IDs...
Abert's Towhee → Q3178867
Mountain Chickadee → Q973320
Northern Pintail → Q25450
Northern Parula → Q27075910
Northern Mockingbird → Q829683
Northern Lapwing → Q25392
Northern Hawk Owl → RND219479
Northern Flicker → Q16819
Northern Cardinal → Q726389
Northern Bobwhite → Q142651
New Zealand Bellbird → Q394708
Nashville Warbler → Q27075894
Mute Swan → Q25402
Mourning Warbler → Q27075897
Mourning Dove → Q11937719
Morelet's Seedeater → Q31874551
Greater Pewee → Q1263607
Montezuma Oropendola → Q934975
Montagu's Harrier → RND439261
Monk Parakeet → Q724987
Mistle Thrush → Q178942
Middle Spotted Woodpecker → Q275591
Mexican Jay → Q942604
Mexican Duck → RND412182
Merlin → Q76148
Melodious Blackbird → Q1588405
Meadow Pipit → Q26956
Masked Tityra → Q1304903
Marsh Wren → Q1379530
Marsh Tit → Q207838
Northern Shoveler → Q28106731
Northern Waterthrush → Q27075884
Northern Wheatear → Q26420
Nuttall's Woodpecker → RND060692
Prairie Warbler → Q27075937
Plain Prinia → Q73928
Plain Chach

In [7]:
df

Unnamed: 0,index,mid,display_name,wikidata_qid
0,1,Q3178867,Abert's_Towhee,Q3178867
1,2,Q973320,Mountain_Chickadee,Q973320
2,3,Q25450,Northern_Pintail,Q25450
3,4,Q27075910,Northern_Parula,Q27075910
4,5,Q829683,Northern_Mockingbird,Q829683
...,...,...,...,...
540,541,Q2224815,Juniper_Titmouse,Q2224815
541,542,Q776016,Golden_Tanager,Q776016
542,543,Q912057,Green_Ibis,Q912057
543,544,Q5419907,Variable_Oriole,Q5419907


In [6]:
df['mid'] = df['wikidata_qid']

In [9]:
df.drop('wikidata_qid', axis=1, inplace=True)

In [16]:
df

Unnamed: 0,index,mid,display_name
0,1,Q3178867,"""Abert's_Towhee"""
1,2,Q973320,"""Mountain_Chickadee"""
2,3,Q25450,"""Northern_Pintail"""
3,4,Q27075910,"""Northern_Parula"""
4,5,Q829683,"""Northern_Mockingbird"""
...,...,...,...
540,541,Q2224815,"""Juniper_Titmouse"""
541,542,Q776016,"""Golden_Tanager"""
542,543,Q912057,"""Green_Ibis"""
543,544,Q5419907,"""Variable_Oriole"""


In [15]:
df['display_name'] = df['display_name'].apply(lambda x: "\""+x+"\"")

In [18]:
import csv
df.to_csv("xcl_map_with_qids.csv", sep=',', index=False, encoding='utf-8', quoting=csv.QUOTE_NONE)