# Catalogue Disambiguation: `search` -> `disambiguate` -> `filter`

Continues from `2020_06_26 searching items by name.ipynb`.

In [1]:
import sys
sys.path.append("..")

from heritageconnector.utils.sparql import get_sparql_results

import re
from tqdm import tqdm
import pandas as pd

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

tqdm.pandas()

  from pandas import Panel


## 1. Load data and ID mapping

In [2]:
id_mapping = pd.read_csv("../GITIGNORE_DATA/itemnames_links.csv")

# get rid of IDs for which we don't have a qcode
id_mapping = id_mapping[id_mapping["wikidata_id"].astype(str).str.startswith("Q")]
names_mapped = id_mapping['item_name'].tolist()

id_mapping.head(2)

Unnamed: 0,item_name,count,match_name,match_id,getty_aat_id,wikidata_id
0,photograph,11087.0,photographs,aat/300046300,300046300.0,Q125191
1,poster,10003.0,posters,aat/300027221,300027221.0,Q429785


In [3]:
df = pd.read_csv("../GITIGNORE_DATA/smg-datasets-private/mimsy-catalogue-export.csv")

# clean up title column so it's searchable in the SPARQL query
df["TITLE"] = df["TITLE"].str.replace("\"", "").replace("\n", "")

# make list from ITEM_NAME column
df["ITEM_NAME_list"] = df['ITEM_NAME'].fillna("").astype(str).apply(lambda i: [x.strip().lower() for x in i.split(';')])

# filter df to only items which have properties we have matched
df['name_mapped'] = df['ITEM_NAME_list'].progress_apply(lambda x: any([item in names_mapped for item in x]) )
dfm = df[df['name_mapped'] == True]

# percentage, number of items for which we have a property that can be mapped:
print(len(dfm) / len(df) * 100, len(dfm))

  interactivity=interactivity, compiler=compiler, result=result)
100%|██████████| 282259/282259 [00:06<00:00, 44662.71it/s]


33.069627540663014 93342


## 2. Get qcodes from item names using mapping table

In [4]:
def qcode_from_name(name):
    try:
        return id_mapping.loc[id_mapping["item_name"] == name, 'wikidata_id'].values[0]
    except:
        return None

dfm['ITEM_NAME_qcodes'] = dfm['ITEM_NAME_list'].apply(lambda l: [qcode_from_name(i) for i in l if qcode_from_name(i) not in (None, )])

dfm['ITEM_NAME_qcodes'].apply(len).value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


1    85357
2     7466
3      476
4       40
5        2
7        1
Name: ITEM_NAME_qcodes, dtype: int64

## 3. `search`: fine-tuning SPARQL query
Item properties used for query:
- name (text search)
- item_name (Wikidata qcode search)

### 3.1 Filter stopwords
Any item names that are 'generic'.

In [5]:
itemnames_unique = list(set(df['ITEM_NAME_list'].sum()))
itemnames_unique = [item for item in itemnames_unique if item not in [""]]

stopwords = itemnames_unique
stopwords += ['statue of buddha', 'pin', 'pencil drawing', 'hindu astrolabe', 'glass bottle'] # custom stopwords

title_count = dfm['TITLE'].astype(str).str.lower().value_counts()
unique_titles = title_count.index.tolist()
print("short/long...")
stopwords += [title for title in unique_titles if len(title)<= 3 or len(title)>=50] # short/long strings
print("frequent...")
stopwords += [title for title in unique_titles if title_count[title] > 1] # frequent strings

print('performing filtering...')
len_before = len(dfm)
dfm = dfm[~dfm['TITLE'].astype(str).str.lower().isin(stopwords)]

len(dfm)/len_before, len(dfm)

short/long...
frequent...
performing filtering...


(0.3803539671316235, 35503)

### 3.2 Run search

In [6]:
endpoint_url = "https://query.wikidata.org/sparql"
    
def run_query(mkey, title, itemnames):
    # NOTE: the LIMIT value at the end also tells the query how far up to go in the 'class of' chain
    map_ids = lambda ids: ",".join([f"wd:{i}" for i in ids])
    
    query = f"""
    SELECT DISTINCT ?item ?itemLabel ?type ?typeLabel WHERE {{
      SERVICE wikibase:mwapi {{
          bd:serviceParam wikibase:api "EntitySearch" .
          bd:serviceParam wikibase:endpoint "www.wikidata.org" .
          bd:serviceParam mwapi:search "{title}" .
          bd:serviceParam mwapi:language "en" .
          ?item wikibase:apiOutputItem mwapi:item .
          ?num wikibase:apiOrdinal true .
      }}
      ?item (wdt:P279|wdt:P31) ?type .
      ?item (wdt:P31/wdt:P279*) ?classTree .
      FILTER ( ?classTree in ( {map_ids(itemnames)} ) )
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en"}}
    }} ORDER BY ASC(?num) LIMIT 20
    """
    
    res = get_sparql_results(endpoint_url, query)['results']['bindings']
    res_df = pd.json_normalize(res)
    if len(res_df) > 0:
        res_df["MKEY"] = mkey
        res_df["TITLE"] = title
        res_df = res_df[['MKEY', 'TITLE', 'item.value', 'itemLabel.value', 'type.value', 'typeLabel.value']]
        res_df = res_df.drop_duplicates(subset=["MKEY", "TITLE", "item.value", "itemLabel.value"])
    return res_df


In [8]:
query_res = pd.DataFrame()

for idx, row in tqdm(dfm.iterrows(), total=dfm.shape[0]):
    mkey = row["MKEY"]
    title = row["TITLE"]
    itemnames = row["ITEM_NAME_qcodes"]
    
    try:
        tempdf = run_query(mkey, title, itemnames)
        query_res = pd.concat([query_res, tempdf])
    
        if len(tempdf) > 0: 
            print(f"{mkey}, {title}: {len(tempdf)}")
    except:
        print(f"{mkey}, {title} FAILED")

  0%|          | 173/35503 [01:01<3:53:06,  2.53it/s]

1347, Wells Cathedral clock: 1


  1%|▏         | 450/35503 [02:53<4:01:50,  2.42it/s]

9367, Bleach Works at Llewenni: 1


  2%|▏         | 689/35503 [04:24<6:21:00,  1.52it/s] 

12627, Paracelsus: 1


  7%|▋         | 2440/35503 [14:38<4:40:51,  1.96it/s]

27748, Leopard: 2


  7%|▋         | 2500/35503 [15:01<3:57:30,  2.32it/s]

26715, Sans Pareil: 1


 10%|▉         | 3503/35503 [20:57<3:01:12,  2.94it/s] 

56116, star and sundial, Dutch, 1681: 1


 10%|▉         | 3512/35503 [21:00<2:47:13,  3.19it/s]

56133, Islamic astrolabe, 1601-1699: 1


 10%|▉         | 3538/35503 [21:08<2:58:27,  2.99it/s]

56287, European astrolabe, 1320-1330: 1


 10%|▉         | 3550/35503 [21:12<3:09:10,  2.82it/s]

56286, European astrolabe, 1548: 1


 10%|█         | 3552/35503 [21:13<3:12:59,  2.76it/s]

56288, Planispheric astrolabe: 8


 10%|█         | 3553/35503 [21:14<3:36:48,  2.46it/s]

56289, European astrolabe, 1295-1305: 1


 10%|█         | 3554/35503 [21:14<3:37:32,  2.45it/s]

56290, Ptolemaic armillary sphere: 1


 10%|█         | 3556/35503 [21:15<3:29:00,  2.55it/s]

56292, European astrolabe, 1425-1435: 1


 10%|█         | 3577/35503 [21:22<3:07:11,  2.84it/s]

56145, Islamic astrolabe, 1701-1800: 1


 10%|█         | 3662/35503 [21:46<2:41:49,  3.28it/s]

54687, Daniell hygrometer: 1


 10%|█         | 3678/35503 [21:52<3:42:21,  2.39it/s]

57143, Hindu astrolabe, 1800-1899: 1


 10%|█         | 3712/35503 [22:05<3:23:30,  2.60it/s]

56975, Islamic astrolabe, 1145-1155: 1


 10%|█         | 3713/35503 [22:05<3:14:27,  2.72it/s]

56976, Islamic astrolabe, 1605-1606: 1


 10%|█         | 3714/35503 [22:06<3:18:32,  2.67it/s]

56980, European astrolabe,1650: 1


 10%|█         | 3721/35503 [22:09<3:33:41,  2.48it/s]

56821, Islamic astrolabe, 1645-1655: 1


 10%|█         | 3722/35503 [22:09<3:26:45,  2.56it/s]

56822, European astrolabe, 1495-1505: 1


 10%|█         | 3727/35503 [22:10<3:13:43,  2.73it/s]

57119, Hindu astrolabe commissioned at Jaipur: 1


 11%|█         | 3745/35503 [22:16<3:12:22,  2.75it/s]

57106, Islamic astrolabe, 1849: 1


 11%|█         | 3779/35503 [22:28<2:56:06,  3.00it/s]

56819, European astrolabe, 1570: 1


 12%|█▏        | 4205/35503 [25:08<5:36:25,  1.55it/s]

64758, The Menai Bridge: 20


 12%|█▏        | 4248/35503 [25:28<3:55:36,  2.21it/s]

65152, Stevedores: 2


 12%|█▏        | 4294/35503 [25:47<2:54:06,  2.99it/s]

65167, Charles Babbage Esqr: 1


 12%|█▏        | 4399/35503 [26:33<3:51:33,  2.24it/s]

65389, An iron forge: 1


 12%|█▏        | 4418/35503 [26:41<4:03:03,  2.13it/s]

65337, Dick: 1


 12%|█▏        | 4425/35503 [26:43<3:42:17,  2.33it/s]

65494, John Flamsteed: 1


 13%|█▎        | 4476/35503 [27:06<6:08:41,  1.40it/s]

65500, James Ferguson: 1


 13%|█▎        | 4545/35503 [27:31<3:01:05,  2.85it/s]

64637, Parisian Sketches: 1


 13%|█▎        | 4580/35503 [27:44<4:29:44,  1.91it/s]

64756, Pont y Cyslte Aqueduct: 1


 13%|█▎        | 4591/35503 [27:49<4:53:01,  1.76it/s]

64708, The Creation: 4


 13%|█▎        | 4597/35503 [27:52<4:17:16,  2.00it/s]

64477, The Forge: 5


 13%|█▎        | 4615/35503 [28:00<3:29:22,  2.46it/s]

64819, Davies Gilbert: 1


 13%|█▎        | 4647/35503 [28:14<3:44:35,  2.29it/s]

67611, Life School Royal Academy: 1


 13%|█▎        | 4694/35503 [28:32<3:36:28,  2.37it/s]

67791, Brougham: 1


 13%|█▎        | 4711/35503 [28:38<4:09:43,  2.06it/s]

67432, Jacob Perkins Esq: 1


 13%|█▎        | 4768/35503 [29:01<4:03:21,  2.10it/s]

67323, Ironing: 1


 14%|█▎        | 4802/35503 [29:13<2:50:43,  3.00it/s]

65891, Port Madoc, Carnarvonshire: 1


 14%|█▍        | 4968/35503 [30:14<4:30:10,  1.88it/s]

66903, Erasmus Darwin: 1


 14%|█▍        | 4969/35503 [30:14<4:41:52,  1.81it/s]

66904, The Farrier's Shop: 1


 14%|█▍        | 4977/35503 [30:18<4:45:56,  1.78it/s]

66680, Sir Hugh Myddelton: 1


 14%|█▍        | 5055/35503 [30:57<5:55:02,  1.43it/s]

66746, The Penrhyn Slate Quarries: 1


 14%|█▍        | 5058/35503 [30:58<4:30:37,  1.88it/s]

66285, Reflections: 7


 14%|█▍        | 5089/35503 [31:10<2:57:31,  2.86it/s]

66832, Mr. Peter Nicholson: 1


 15%|█▍        | 5205/35503 [31:52<5:02:01,  1.67it/s]

67589, Costumes: 18


 15%|█▍        | 5216/35503 [31:59<4:21:03,  1.93it/s]

67802, Princess car: 1


 15%|█▍        | 5229/35503 [32:05<3:53:15,  2.16it/s]

67493, The Balloon: 2


 15%|█▍        | 5252/35503 [32:16<4:24:25,  1.91it/s]

67290, Grande Semaine d'Aviation: 2


 15%|█▍        | 5300/35503 [32:38<3:34:53,  2.34it/s]

72547, Statue of St. Catherine: 2


 17%|█▋        | 6036/35503 [36:47<4:37:37,  1.77it/s] 

82141, Terracotta statue: 3


 17%|█▋        | 6067/35503 [36:59<4:45:58,  1.72it/s]

81891, Statue of Artemis: 1


 17%|█▋        | 6109/35503 [37:11<3:40:40,  2.22it/s]

85442, Statue of St Mary Magdalene: 1


 18%|█▊        | 6333/35503 [38:18<2:30:24,  3.23it/s]

85284, Statue of Saint Anne: 5


 18%|█▊        | 6516/35503 [39:17<2:20:23,  3.44it/s]

82995, Eye of Horus amulet: 7


 26%|██▌       | 9168/35503 [54:57<2:59:15,  2.45it/s] 

122624, Figure of Durga: 2


 27%|██▋       | 9429/35503 [56:47<3:57:15,  1.83it/s]

125120, Statue of god: 2


 27%|██▋       | 9465/35503 [56:58<1:59:13,  3.64it/s]

123586, Figure of Subhadra: 1


 27%|██▋       | 9560/35503 [57:39<2:43:32,  2.64it/s]

124294, Figure of Ganesha: 4


 28%|██▊       | 9850/35503 [59:21<1:57:09,  3.65it/s]

125156, Figure of man: 2


 28%|██▊       | 9852/35503 [59:22<2:35:38,  2.75it/s]

125158, Figure of a Bodhisattva: 8


 30%|███       | 10679/35503 [1:04:20<2:49:30,  2.44it/s]

155720, Planispheric astrolabe, 1572: 1


 31%|███       | 11018/35503 [1:06:31<2:38:32,  2.57it/s] 

158445, Drawing of a church: 1


 34%|███▍      | 12233/35503 [1:13:27<2:39:04,  2.44it/s]

203459, Sand Dunes: 3


 35%|███▌      | 12453/35503 [1:14:45<3:34:22,  1.79it/s]

204433, Fashion: 2


 36%|███▌      | 12728/35503 [1:16:28<2:47:00,  2.27it/s]

203769, The Morning Chronicle: 1


 37%|███▋      | 13240/35503 [1:19:26<2:27:13,  2.52it/s]

227026, Southend: 1


 38%|███▊      | 13477/35503 [1:20:45<2:09:59,  2.82it/s]

227309, Race Meeting: 1


 39%|███▊      | 13671/35503 [1:21:57<2:33:17,  2.37it/s]

227789, Corfe Castle: 5


 39%|███▊      | 13699/35503 [1:22:08<4:17:23,  1.41it/s]

227792, Assistance: 3


 39%|███▊      | 13753/35503 [1:22:33<3:36:51,  1.67it/s]

227818, Giant's Causeway: 1


 39%|███▉      | 13843/35503 [1:23:02<1:54:41,  3.15it/s]

227844, Bamburgh Castle: 1


 39%|███▉      | 13944/35503 [1:23:39<2:41:10,  2.23it/s]

227135, Mumbles: 1


 40%|████      | 14295/35503 [1:25:48<1:42:47,  3.44it/s]

229010, American Line: 2


 40%|████      | 14330/35503 [1:26:00<2:40:13,  2.20it/s]

226895, The Queen: 3


 41%|████▏     | 14698/35503 [1:28:15<2:26:40,  2.36it/s]

226984, Painting on silk: 1


 42%|████▏     | 14911/35503 [1:29:35<3:57:30,  1.45it/s]

226614, The Tower of London: 1


 42%|████▏     | 14951/35503 [1:29:50<2:50:36,  2.01it/s]

227550, Windermere: 2


 42%|████▏     | 14978/35503 [1:30:05<10:42:11,  1.88s/it]

217541, Armchair, Stockton & Darlington Railway FAILED


 42%|████▏     | 14997/35503 [1:30:18<11:05:10,  1.95s/it]

215695, Sachet of `Spotkleen' stain-removing cloth FAILED


 44%|████▍     | 15613/35503 [1:33:59<2:20:26,  2.36it/s] 

227121, Waterloo: 1


 44%|████▍     | 15657/35503 [1:34:15<2:37:58,  2.09it/s]

227591, Hirondelle: 2


 44%|████▍     | 15701/35503 [1:34:30<3:03:49,  1.80it/s]

227357, Looe: 1


 45%|████▌     | 15980/35503 [1:36:00<1:31:14,  3.57it/s]

229700, Chemin de Fer du Nord: 1


 45%|████▌     | 16073/35503 [1:36:34<2:10:13,  2.49it/s]

409851, L'Astronomie: 1


 45%|████▌     | 16138/35503 [1:36:58<4:16:03,  1.26it/s]

412295, Iron Horse: 1


 46%|████▌     | 16286/35503 [1:37:50<2:49:32,  1.89it/s]

421099, High Level Bridge: 2


 46%|████▌     | 16405/35503 [1:38:35<2:33:15,  2.08it/s]

420073, The Great Bear: 1


 46%|████▋     | 16452/35503 [1:38:51<2:31:35,  2.09it/s]

421145, Welshpool: 1


 46%|████▋     | 16464/35503 [1:38:55<2:08:19,  2.47it/s]

420086, London Bridge: 3


 46%|████▋     | 16467/35503 [1:38:56<2:06:16,  2.51it/s]

420091, The Menai Bridge, Bangor: 2


 47%|████▋     | 16509/35503 [1:39:10<1:46:34,  2.97it/s]

421043, The Evening Star: 1


 47%|████▋     | 16517/35503 [1:39:13<2:43:03,  1.94it/s]

412973, Britannia Bridge: 4


 47%|████▋     | 16646/35503 [1:39:58<2:26:34,  2.14it/s]

421395, The Castle: 1


 47%|████▋     | 16647/35503 [1:39:59<2:34:16,  2.04it/s]

421396, The Menai Suspension Bridge: 3


 49%|████▊     | 17266/35503 [1:43:46<2:06:46,  2.40it/s]

420787, Fury: 1


 49%|████▉     | 17319/35503 [1:44:06<1:49:18,  2.77it/s]

421270, Great Tubular Bridge across the Menai Strait: 1


 49%|████▉     | 17473/35503 [1:44:57<1:38:29,  3.05it/s]

419731, Railway viaduct: 1


 49%|████▉     | 17506/35503 [1:45:09<1:51:28,  2.69it/s]

419767, Grosmont: 8


 49%|████▉     | 17565/35503 [1:45:30<1:57:19,  2.55it/s]

420097, Winchester Cathedral: 2


 50%|████▉     | 17744/35503 [1:46:37<1:26:13,  3.43it/s]

447699, ITT CK702\1 26 colour television receiver, 1972 FAILED


 50%|█████     | 17810/35503 [1:46:59<2:06:39,  2.33it/s]

462444, Her Majesty the Queen: 3


 50%|█████     | 17811/35503 [1:46:59<1:57:26,  2.51it/s]

462445, Her Most Gracious Majesty Queen Victoria: 1


 50%|█████     | 17918/35503 [1:47:37<1:21:21,  3.60it/s]

447962, Bacon's map: 1


 51%|█████     | 17988/35503 [1:48:00<2:17:27,  2.12it/s]

440431, Farewell: 6


 52%|█████▏    | 18364/35503 [1:50:18<1:59:04,  2.40it/s]

485540, water colour: 1


 52%|█████▏    | 18429/35503 [1:50:43<1:30:30,  3.14it/s]

483430, John Bunyan: 2


 53%|█████▎    | 18731/35503 [1:52:25<1:43:38,  2.70it/s]

523212, Kite flying: 1


 53%|█████▎    | 18785/35503 [1:52:43<1:49:05,  2.55it/s]

523071, HP22: 1


 53%|█████▎    | 18833/35503 [1:52:59<1:24:21,  3.29it/s]

528956, Daily Express: 5


 53%|█████▎    | 18844/35503 [1:53:02<1:29:08,  3.11it/s]

520296, grid compass: 1


 53%|█████▎    | 18888/35503 [1:53:18<1:59:26,  2.32it/s]

527143, Country Scenes: 2


 53%|█████▎    | 18908/35503 [1:53:26<2:19:15,  1.99it/s]

523947, HP 55: 1


 53%|█████▎    | 18909/35503 [1:53:26<2:11:21,  2.11it/s]

527245, Jacob Perkins Esqr: 1


 53%|█████▎    | 18912/35503 [1:53:28<2:07:00,  2.18it/s]

518962, Shroud: 1


 53%|█████▎    | 18919/35503 [1:53:30<1:30:23,  3.06it/s]

523973, The Transept: 1


 54%|█████▎    | 18998/35503 [1:53:58<2:15:10,  2.03it/s]

530999, Ulysses: 1


 54%|█████▎    | 19002/35503 [1:53:59<1:55:22,  2.38it/s]

532478, The Builder: 1


 54%|█████▍    | 19159/35503 [1:54:55<1:58:21,  2.30it/s]

525314, Clifton Suspension Bridge: 1


 55%|█████▍    | 19402/35503 [1:56:19<2:07:50,  2.10it/s]

535631, David Livingstone: 3


 55%|█████▍    | 19443/35503 [1:56:32<1:56:27,  2.30it/s]

537281, Celebrate the Century: 1


 55%|█████▍    | 19480/35503 [1:56:45<2:11:01,  2.04it/s]

537295, SPARCstation: 4


 55%|█████▌    | 19531/35503 [1:57:04<1:58:26,  2.25it/s]

8001811, Sir Humphrey Davy: 2


 56%|█████▌    | 19767/35503 [1:58:33<2:28:28,  1.77it/s]

8012314, A Barber: 4


 56%|█████▌    | 19768/35503 [1:58:33<2:43:12,  1.61it/s]

8012315, A Cooper: 1


 56%|█████▌    | 19782/35503 [1:58:39<2:00:54,  2.17it/s]

8003675, Calico printing: 2


 56%|█████▌    | 19846/35503 [1:59:02<1:37:19,  2.68it/s]

8003806, Wind: 3


 56%|█████▌    | 19899/35503 [1:59:22<2:07:35,  2.04it/s]

8003879, Albert Medal: 1


 56%|█████▌    | 19963/35503 [1:59:44<1:40:54,  2.57it/s]

531891, Valley: 1


 57%|█████▋    | 20107/35503 [2:00:47<1:51:33,  2.30it/s]

532030, Midnight: 2


 57%|█████▋    | 20355/35503 [2:02:35<1:52:05,  2.25it/s]

8012884, Franklin Medal: 1


 58%|█████▊    | 20603/35503 [2:04:19<2:13:11,  1.86it/s]

8022533, Toledo, Spain: 1


 59%|█████▊    | 20822/35503 [2:05:43<2:33:20,  1.60it/s]

8018713, George Henry Lewes: 1


 62%|██████▏   | 22103/35503 [2:13:55<1:30:00,  2.48it/s]

8058285, Gladstone: 1


 63%|██████▎   | 22215/35503 [2:14:32<1:08:32,  3.23it/s]

8057380, St. Michael's Mount: 1


 63%|██████▎   | 22309/35503 [2:14:58<1:13:34,  2.99it/s]

8058423, seaside village: 1


 66%|██████▌   | 23337/35503 [2:20:17<1:36:06,  2.11it/s]

8096463, Signals: 3


 66%|██████▌   | 23348/35503 [2:20:22<2:06:38,  1.60it/s]

8096499, Broad Street: 1


 66%|██████▌   | 23351/35503 [2:20:25<2:28:17,  1.37it/s]

8096502, Crystal Palace: 1


 67%|██████▋   | 23660/35503 [2:22:02<1:04:25,  3.06it/s]

8101292, Map of London: 1


 68%|██████▊   | 23968/35503 [2:23:46<1:30:47,  2.12it/s]

8104644, Kerry: 2


 68%|██████▊   | 23971/35503 [2:23:48<2:12:36,  1.45it/s]

8104648, Grace: 1


 68%|██████▊   | 23972/35503 [2:23:49<2:13:53,  1.44it/s]

8104649, Kelly: 1


 68%|██████▊   | 24034/35503 [2:24:13<1:13:03,  2.62it/s]

66456, The Company of Undertakers: 2


 68%|██████▊   | 24172/35503 [2:24:58<1:17:06,  2.45it/s]

226771, Ludlow: 1


 69%|██████▉   | 24482/35503 [2:26:38<58:01,  3.17it/s]  

64745, Aerostation: 1


 69%|██████▉   | 24515/35503 [2:26:47<1:20:58,  2.26it/s]

8094447, Contemplation: 1


 69%|██████▉   | 24590/35503 [2:27:11<1:17:29,  2.35it/s]

8002075, Destroyers: 1


 71%|███████   | 25082/35503 [2:29:43<1:29:23,  1.94it/s]

65495, James Gregory: 1


 74%|███████▎  | 26118/35503 [2:35:04<49:07,  3.18it/s]  

8223517, Usherette: 2


 74%|███████▍  | 26217/35503 [2:35:35<59:53,  2.58it/s]  

8223593, Children playing: 5


 75%|███████▍  | 26491/35503 [2:37:01<42:44,  3.51it/s]  

8228268, Gypsy camp: 4


 75%|███████▍  | 26500/35503 [2:37:04<49:57,  3.00it/s]

8228866, Prince Kung: 1


 75%|███████▍  | 26520/35503 [2:37:11<45:35,  3.28it/s]  

8228278, Two young girls: 1


 75%|███████▍  | 26590/35503 [2:37:34<47:00,  3.16it/s]  

8228884, The Begum Kotie: 1


 75%|███████▍  | 26599/35503 [2:37:37<59:58,  2.47it/s]

8228323, Notre Dame: 1


 75%|███████▍  | 26602/35503 [2:37:38<1:07:47,  2.19it/s]

8228329, Fruit and Flowers: 2


 75%|███████▌  | 26633/35503 [2:37:48<53:45,  2.75it/s]  

8228754, Fashion photograph: 1


 75%|███████▌  | 26656/35503 [2:37:54<42:59,  3.43it/s]

8228759, Woman with flowers: 3


 75%|███████▌  | 26658/35503 [2:37:55<1:05:22,  2.25it/s]

8228762, Strawberries: 3


 75%|███████▌  | 26730/35503 [2:38:21<54:02,  2.71it/s]  

8228798, Building site: 1


 75%|███████▌  | 26762/35503 [2:38:34<44:50,  3.25it/s]  

8228805, Trafalgar Square: 1


 75%|███████▌  | 26789/35503 [2:38:44<1:41:06,  1.44it/s]

8228210, Victor Hugo: 2


 76%|███████▌  | 26813/35503 [2:38:52<53:27,  2.71it/s]  

8228215, Gustave Doré: 1


 76%|███████▌  | 26819/35503 [2:38:54<48:35,  2.98it/s]  

8228222, William Powell Frith: 1


 76%|███████▌  | 26830/35503 [2:38:58<53:22,  2.71it/s]

8228999, Tulips: 1


 76%|███████▌  | 26832/35503 [2:38:58<55:28,  2.61it/s]

8229002, White house: 1


 76%|███████▌  | 26833/35503 [2:38:59<56:26,  2.56it/s]

8229004, Man sitting on steps: 1


 76%|███████▌  | 26888/35503 [2:39:20<1:06:51,  2.15it/s]

8229023, The Miners' Bridge: 1


 76%|███████▌  | 26901/35503 [2:39:26<1:18:44,  1.82it/s]

8229036, Woman in costume: 1


 76%|███████▌  | 26946/35503 [2:39:47<1:29:43,  1.59it/s]

8225464, Beatrice: 3


 76%|███████▌  | 26965/35503 [2:39:54<58:57,  2.41it/s]  

8237676, Four Men: 2


 76%|███████▋  | 27147/35503 [2:40:57<48:22,  2.88it/s]  

8226503, A Picnic: 2


 77%|███████▋  | 27212/35503 [2:41:19<1:16:47,  1.80it/s]

8233524, Hyde Park, London: 1


 77%|███████▋  | 27312/35503 [2:41:51<1:04:18,  2.12it/s]

8226566, Unknown woman: 3


 77%|███████▋  | 27315/35503 [2:41:53<1:09:59,  1.95it/s]

8226570, The Bungalow: 1


 77%|███████▋  | 27398/35503 [2:42:21<52:38,  2.57it/s]  

8230201, Houses of Parliament: 1


 77%|███████▋  | 27410/35503 [2:42:25<47:04,  2.87it/s]

8225945, Kitchen: 1


 78%|███████▊  | 27558/35503 [2:43:14<50:38,  2.61it/s]  

8227087, Old Vennel, off High Street: 1


 78%|███████▊  | 27574/35503 [2:43:19<34:21,  3.85it/s]  

8226107, Three women in hats: 1


 78%|███████▊  | 27773/35503 [2:44:29<1:33:42,  1.37it/s]

8237840, Nude Study: 1


 78%|███████▊  | 27776/35503 [2:44:30<57:20,  2.25it/s]  

8237855, The Onion Field: 1


 78%|███████▊  | 27797/35503 [2:44:37<47:40,  2.69it/s]  

8233193, Migrant Mother, Nipomo, California: 1


 78%|███████▊  | 27807/35503 [2:44:42<1:05:21,  1.96it/s]

8237866, The Steerage: 8


 79%|███████▉  | 27971/35503 [2:45:35<47:13,  2.66it/s]  

8347157, Roger Fenton: 1


 79%|███████▉  | 28027/35503 [2:45:53<39:26,  3.16it/s]

8338419, Loyalist Militiaman at the Moment of Death: 1


 79%|███████▉  | 28112/35503 [2:46:18<1:14:52,  1.65it/s]

8341385, Clouds: 2


 79%|███████▉  | 28141/35503 [2:46:29<1:22:18,  1.49it/s]

8341101, Frank Sinatra: 1


 80%|███████▉  | 28228/35503 [2:46:57<46:38,  2.60it/s]  

8343236, Fern: 3


 80%|████████  | 28503/35503 [2:48:29<1:29:56,  1.30it/s]

8360984, American Falls: 1


 81%|████████  | 28603/35503 [2:49:02<36:43,  3.13it/s]  

8364237, Barry Island: 1


 81%|████████  | 28768/35503 [2:49:55<51:52,  2.16it/s]  

8401541, Cocksucker: 1


 81%|████████  | 28787/35503 [2:50:05<1:07:56,  1.65it/s]

8401560, England: 1


 81%|████████  | 28829/35503 [2:50:20<1:16:17,  1.46it/s]

8401593, Black Hat: 1


 81%|████████  | 28845/35503 [2:50:27<42:18,  2.62it/s]  

8402732, J.B. Dancer

Micrograph View FAILED


 82%|████████▏ | 28995/35503 [2:51:17<28:33,  3.80it/s]  

8404961, Lancashire Boiler: 1


 82%|████████▏ | 29182/35503 [2:52:14<30:12,  3.49it/s]

8406022, Polaroid Land Camera: 1


 82%|████████▏ | 29288/35503 [2:53:00<5:59:55,  3.47s/it]

8418037, Lump hammer: 1


 85%|████████▍ | 30108/35503 [2:57:26<27:38,  3.25it/s]  

8411993, British War Medal: 1


 85%|████████▌ | 30182/35503 [2:57:48<17:16,  5.13it/s]

 Wove FAILEDlow & Jones Limited


 85%|████████▌ | 30263/35503 [2:58:12<28:54,  3.02it/s]

8432515, Just Arrived from Kansas: 1


 86%|████████▌ | 30389/35503 [2:58:56<27:34,  3.09it/s]

8432540, Newsboy: 1


 86%|████████▌ | 30545/35503 [3:00:01<32:21,  2.55it/s]  

8432404, Cecil Beaton: 1


 86%|████████▌ | 30561/35503 [3:00:08<37:30,  2.20it/s]

8426108, Textile mill: 1


 86%|████████▋ | 30633/35503 [3:00:43<40:43,  1.99it/s]  

8426128, Painted sign: 1


 86%|████████▋ | 30681/35503 [3:01:01<38:15,  2.10it/s]

8432435, Children of rehabilitation client: 1


 86%|████████▋ | 30686/35503 [3:01:04<41:32,  1.93it/s]

8432666, Cyclists: 1


 86%|████████▋ | 30701/35503 [3:01:11<54:08,  1.48it/s]

8426834, Cafe interior: 1


 87%|████████▋ | 30744/35503 [3:01:27<25:50,  3.07it/s]

8432443, Street Musicians: 1


 87%|████████▋ | 30795/35503 [3:01:45<23:21,  3.36it/s]

8432450, Drought Refugees: 1


 87%|████████▋ | 30812/35503 [3:01:51<32:40,  2.39it/s]

8426177, Country road: 1


 87%|████████▋ | 30816/35503 [3:01:52<26:29,  2.95it/s]

8426858, Doorway: 2


 87%|████████▋ | 30823/35503 [3:01:54<26:13,  2.97it/s]

8427066, Rag and bone man: 1


 87%|████████▋ | 30993/35503 [3:02:53<25:28,  2.95it/s]

8432499, Factory Interior: 1


 88%|████████▊ | 31119/35503 [3:03:36<24:22,  3.00it/s]  

8431503, Dallas, Texas: 2


 88%|████████▊ | 31121/35503 [3:03:37<23:50,  3.06it/s]

8431507, Austin, Texas: 1


 89%|████████▊ | 31448/35503 [3:05:25<26:42,  2.53it/s]

8424978, Gas works: 1


 89%|████████▊ | 31454/35503 [3:05:27<27:47,  2.43it/s]

8425184, Corner shop: 1


 89%|████████▊ | 31471/35503 [3:05:34<44:34,  1.51it/s]

8424993, Street corner: 3


 90%|████████▉ | 31776/35503 [3:07:19<26:02,  2.39it/s]

8557313, Farmyard: 1


 90%|████████▉ | 31813/35503 [3:07:35<47:30,  1.29it/s]

8557140, Woman in Window: 1


 90%|████████▉ | 31863/35503 [3:07:53<29:30,  2.06it/s]

8466192, In the Trenches: 1


 90%|█████████ | 31971/35503 [3:08:31<26:18,  2.24it/s]

8537735, Aubrey de Vere: 1


 91%|█████████ | 32191/35503 [3:09:41<19:24,  2.84it/s]

8558922, Football Team: 1


 91%|█████████ | 32197/35503 [3:09:43<20:44,  2.66it/s]

8559096, Portrait of Man: 2


 91%|█████████ | 32222/35503 [3:09:53<19:22,  2.82it/s]

8576009, Christmas Party: 1


 91%|█████████ | 32276/35503 [3:10:10<19:50,  2.71it/s]

8566900, Cindy: 1


 91%|█████████ | 32321/35503 [3:10:26<27:35,  1.92it/s]

8558913, Female Nude: 1


 91%|█████████ | 32382/35503 [3:10:48<16:37,  3.13it/s]

8568747, Bryant Park: 1


 91%|█████████▏| 32447/35503 [3:11:09<17:51,  2.85it/s]

8591890, General Election: 1


 91%|█████████▏| 32480/35503 [3:11:20<14:54,  3.38it/s]

8591900, Margaret Thatcher: 1


 92%|█████████▏| 32631/35503 [3:12:07<22:09,  2.16it/s]

8576224, The Harbour: 6


 92%|█████████▏| 32683/35503 [3:12:23<15:21,  3.06it/s]

8591779, Paul Simon: 1


 96%|█████████▌| 34011/35503 [3:19:09<09:31,  2.61it/s]

8646035, Nude: 3


 96%|█████████▌| 34150/35503 [3:19:55<11:42,  1.92it/s]

8647618, Dali Atomicus: 2


 96%|█████████▋| 34233/35503 [3:20:22<07:51,  2.69it/s]

8646328, George Balanchine: 1


 97%|█████████▋| 34414/35503 [3:21:20<05:45,  3.15it/s]

8646058, Horse Show: 11


 97%|█████████▋| 34497/35503 [3:21:48<06:11,  2.71it/s]

8648273, David Lloyd George: 1


 97%|█████████▋| 34580/35503 [3:22:16<05:41,  2.71it/s]

8645820, Gamblers: 2


 98%|█████████▊| 34648/35503 [3:22:39<07:04,  2.02it/s]

8645704, Atlantic City: 10


 98%|█████████▊| 34665/35503 [3:22:46<07:44,  1.80it/s]

8645705, Park Avenue: 3


 98%|█████████▊| 34780/35503 [3:23:21<04:06,  2.93it/s]

8668271, Conwy: 3


 98%|█████████▊| 34799/35503 [3:23:27<05:22,  2.18it/s]

8653325, Folies-Bergère: 1


 98%|█████████▊| 34805/35503 [3:23:28<03:27,  3.37it/s]

8653334, Backstage at the Folies-Bergère: 1


 98%|█████████▊| 34847/35503 [3:23:43<05:14,  2.09it/s]

8654612, Early morning: 1


 98%|█████████▊| 34942/35503 [3:24:19<03:31,  2.65it/s]

8654103, Salton Sea: 3


 99%|█████████▊| 34977/35503 [3:24:32<03:44,  2.34it/s]

8653019, Taj Mahal: 5


 99%|█████████▉| 35070/35503 [3:25:04<03:01,  2.39it/s]

8668653, Shoe shine boy: 1


 99%|█████████▉| 35084/35503 [3:25:08<02:08,  3.27it/s]

8669214, Anti-aircraft gun: 1


 99%|█████████▉| 35090/35503 [3:25:09<02:29,  2.75it/s]

8670014, Village scene: 2


 99%|█████████▉| 35191/35503 [3:25:44<01:59,  2.61it/s]

8653462, Place de la Concorde: 2


 99%|█████████▉| 35197/35503 [3:25:47<02:09,  2.37it/s]

8653467, Soiree: 1


 99%|█████████▉| 35218/35503 [3:25:53<01:36,  2.94it/s]

8654627, Adam and Eve: 1


 99%|█████████▉| 35220/35503 [3:25:54<01:38,  2.86it/s]

8654631, Sunflowers: 2


 99%|█████████▉| 35303/35503 [3:26:17<00:59,  3.34it/s]

8653407, Notre-Dame: 1


 99%|█████████▉| 35316/35503 [3:26:22<01:08,  2.73it/s]

8670122, Mending nets: 1


100%|██████████| 35503/35503 [3:27:18<00:00,  2.85it/s]


In [12]:
query_res.to_csv('../GITIGNORE_DATA/disambiguation/objects_lookup_1.csv', index=False)
query_res.head()

Unnamed: 0,MKEY,TITLE,item.value,itemLabel.value,type.value,typeLabel.value
0,1347,Wells Cathedral clock,http://www.wikidata.org/entity/Q11156104,Wells Cathedral clock,http://www.wikidata.org/entity/Q5275,astronomical clock
0,9367,Bleach Works at Llewenni,http://www.wikidata.org/entity/Q23832039,"Bleach works at Llewenni: as at first intended to be built for the honble Thos Fitzmaurice, Denbighshire",http://www.wikidata.org/entity/Q11060274,print
0,12627,Paracelsus,http://www.wikidata.org/entity/Q55014956,Paracelsus,http://www.wikidata.org/entity/Q11060274,print
0,27748,Leopard,http://www.wikidata.org/entity/Q83553778,Leopard,http://www.wikidata.org/entity/Q3305213,painting
1,27748,Leopard,http://www.wikidata.org/entity/Q19883049,Leopard,http://www.wikidata.org/entity/Q3305213,painting
