<div class="alert alert-success">
Jump to the final section for the generation of gazetteers from prefetched Wikidata queries
</div>

In [126]:
import time
import json
import pickle 

import requests
import pandas as pd

from tqdm.notebook import tqdm
from SPARQLWrapper import SPARQLWrapper, JSON

# Get all subclasses labels

In [198]:
sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Chrome/87.0 (Harrando@eurecom.fr) SPARQLWrapper")

classes = {
    "Organization": "Q43229", #includes companies
    "Name": "Q82799",
    "Artist": "Q483501",
    "Geolocation": "Q2221906",
    "City": "Q515",
    "Capital": "Q5119",
    "Town": "Q3957",
    "Demonym": "Q217438",
    "Product": "Q2424752",
    "Brand": "Q431289",
    "Georegion": "Q82794",
    "Country": "Q6256",
    "Given name": "Q202444",
    "Family name": "Q101352"
}

In [192]:
url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
subclasses = {}

for label, wikidata_code in tqdm(classes.items()):
    try:
        print(label, wikidata_code)
        query = "SELECT distinct ?class ?classLabel " + \
                "WHERE { ?class wdt:P279|wdt:P279/wdt:P279 wd:" + wikidata_code + ". " + \
                "        SERVICE wikibase:label { bd:serviceParam wikibase:language 'en' } }"
        data = requests.get(url, params={'query': query, 'format': 'json'}).json()
        # time.sleep(1)
        subclasses[label] = data
    except Exception as e:
        print(str(e))

In [199]:
subclasses = pickle.load(open('subclasses.pickle', 'rb'))

In [200]:
subclasses.keys()

dict_keys(['Organization', 'Name', 'Artist', 'Geolocation', 'City', 'Capital', 'Town', 'Demonym', 'Product', 'Brand', 'Georegion', 'Country'])

In [202]:
url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'

for label, wikidata_code in tqdm(classes.items()):
    try:
        if label not in ["Family name", "Given name"]:
            continue
        print(label, wikidata_code)
        query = "SELECT distinct ?class ?classLabel " + \
                "WHERE { ?class wdt:P279|wdt:P279/wdt:P279 wd:" + wikidata_code + ". " + \
                "        SERVICE wikibase:label { bd:serviceParam wikibase:language 'en' } }"
        data = requests.get(url, params={'query': query, 'format': 'json'}).json()
        # time.sleep(1)
        subclasses[label] = data
    except Exception as e:
        print(str(e))

HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))

Given name Q202444
Family name Q101352



In [113]:
# subclasses = pickle.load(open('subclasses.pickle', 'rb'))

In [204]:
for label in subclasses:
    print(label)
    print(len(subclasses[label]['results']['bindings']), 'subclasses')

Organization
3528 subclasses
Name
308 subclasses
Artist
350 subclasses
Geolocation
1324 subclasses
City
190 subclasses
Capital
15 subclasses
Town
39 subclasses
Demonym
6 subclasses
Product
3838 subclasses
Brand
42 subclasses
Georegion
978 subclasses
Country
51 subclasses
Given name
56 subclasses
Family name
122 subclasses


In [205]:
print(subclasses['Organization']['results']['bindings'][0])

{'class': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q79751'}, 'classLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'oligarchy'}}


In [206]:
subclasses_data = []
counter = 0
for class_label in subclasses:
    for result in subclasses[class_label]['results']['bindings']:
        subclass, subclass_label = result['class']['value'], result['classLabel']['value']
        if subclass_label.split('Q')[-1].isnumeric():
            counter += 1
            continue
        
        subclasses_data.append({ # 'class_uri': subclass,
                                'class_qid': subclass.split('/')[-1],
                                'class_label': class_label,
                                'subclass_label': subclass_label.lower()})

counter

1997

In [211]:
pickle.dump(subclasses, open('subclasses.pickle', 'wb'))
subclasses_df = pd.DataFrame(subclasses_data)
subclasses_df.to_csv('csv/subclasses_df.csv', index=False)

In [212]:
subclasses_df.head()

Unnamed: 0,class_qid,class_label,subclass_label
0,Q79751,Organization,oligarchy
1,Q163740,Organization,nonprofit organization
2,Q193622,Organization,order
3,Q194195,Organization,amusement park
4,Q196776,Organization,certificate authority


In [209]:
pd.DataFrame(unused_classes).head()

Unnamed: 0,0
0,Q228353
1,Q230717
2,Q396010
3,Q832115
4,Q1020406


In [210]:
pd.DataFrame(unused_classes).to_csv('csv/unused_subclasses.csv', index=False)

# Count the number of instances from every class

In [14]:
url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
instances = {}

for label, wikidata_code in classes.items():
    try:
        print(label, wikidata_code)
        query = "SELECT ?entity " + \
                "WHERE { ?entity wdt:P31/wdt:P279* wd:" + wikidata_code + ". " + \
                "        SERVICE wikibase:label { bd:serviceParam wikibase:language 'en' } }"
        data = requests.get(url, params={'query': query, 'format': 'json'}).json()
        instances[label] = data['results']['bindings']
        # print(len(results[label]), 'of results returned.')
    except Exception as e:
        print(str(e))

Organization Q43229
Name Q82799
Artist Q483501
Geolocation Q2221906
City Q515
Capital Q5119
Town Q3957
Demonym Q217438
Product Q2424752
Brand Q431289
Georegion Q82794
Country Q6256



In [15]:
for label in classes:
    print(label)
    print(len(instances[label]), 'results')

Organization
2906668 results
Name
542138 results
Artist
436 results
Geolocation
10584664 results
City
33101 results
Capital
602 results
Town
44858 results
Demonym
620 results
Product
722076 results
Brand
8194 results
Georegion
6164118 results
Country
699 results



# Get the classes for each entity

In [213]:
results_classes.keys()

dict_keys(['Organization', 'Name', 'Artist', 'Geolocation', 'City', 'Capital', 'Town', 'Demonym', 'Product', 'Brand', 'Georegion', 'Country', 'Given name'])

In [215]:
# From https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Cats
# results_classes = {}
limit = 10000

for label, wikidata_code in tqdm(classes.items()):
    if label != 'Family name':
        continuesubclass2class = {}
for key in subclasses:
    for sc in subclasses[key]:
        subclass2class[sc] = key
pickle.dump(subclass2class, open('subclass2class.pickle', 'wb'))subclass2class = {}
for key in subclasses:
    for sc in subclasses[key]:
        subclass2class[sc] = key
pickle.dump(subclass2class, open('subclass2class.pickle', 'wb'))
    print(label, wikidata_code)
    results_classes[label] = []
    offset = 0
    while True:
        try:
             #  ?entity ?entityLabel ?entityAltLabel ?entityDescription
            query = """
                    SELECT distinct  ?entity (GROUP_CONCAT(?class ; SEPARATOR = ';') as ?classes)
                    WHERE 
                    {{
                         ?entity wdt:P31|wdt:P31/wdt:P279|wdt:P31/wdt:P279/wdt:P279 wd:{};
                                 wdt:P31 ?class.
                    }}
                    GROUP BY ?entity
                    OFFSET {}
                    LIMIT {}
                    """.format(wikidata_code, offset*limit, limit)
            if offset == 0:
                print(query)
            sparql.setQuery(query)
            sparql.setReturnFormat(JSON)
            offset += 1
            if len(sparql.query().convert()['results']['bindings']) > 0:
                result = sparql.query().convert()['results']['bindings']
                results_classes[label].append(result)
                print(f'{len(results_classes[label][-1])} of results returned at offset {offset}')
            else:
                print(f'No more results returned (offset {offset})')
                break
        except Exception as e:
            if str(e).startswith('EndPointInternalError'):
                e = 'Wikidata TimeoutException'
                offset -= 1
                time.sleep(30)
            print(f'Exception at label {label} (offset {offset}) : {e}')
            

HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))

Family name Q101352

                    SELECT distinct  ?entity (GROUP_CONCAT(?class ; SEPARATOR = ';') as ?classes)
                    WHERE 
                    {
                         ?entity wdt:P31|wdt:P31/wdt:P279|wdt:P31/wdt:P279/wdt:P279 wd:Q101352;
                                 wdt:P31 ?class.
                    }
                    GROUP BY ?entity
                    OFFSET 0
                    LIMIT 10000
                    
10000 of results returned at offset 1
10000 of results returned at offset 2
10000 of results returned at offset 3
10000 of results returned at offset 4
10000 of results returned at offset 5
10000 of results returned at offset 6
10000 of results returned at offset 7
10000 of results returned at offset 8
10000 of results returned at offset 9
10000 of results returned at offset 10
10000 of results returned at offset 11
10000 of results returned at offset 12
10000 of results returned at offset 13
10000 of results returned at offset 14
10000 of 

In [216]:
print(results_classes['Name'][0][1])

{'entity': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q80443'}, 'classes': {'type': 'literal', 'value': 'http://www.wikidata.org/entity/Q16521;http://www.wikidata.org/entity/Q1040689'}}


In [217]:
entity_classes_data = []

for class_label in results_classes:
    results = [x for l in results_classes[class_label] for x in l]
    for result in results:
        entity_classes_data.append({
            'entity_qid': result['entity']['value'].split('/')[-1],
            'entity_label': class_label,
            'entity_classes': ';'.join([s.split('/')[-1] for s in result['classes']['value'].split(';')]),
        })

In [218]:
# pickle.dump(results_classes, open('results_classes.pickle', 'wb'))

In [219]:
df_entity_classes = pd.DataFrame(entity_classes_data)
df_entity_classes

Unnamed: 0,entity_qid,entity_label,entity_classes
0,Q65968,Organization,Q164950
1,Q68330,Organization,Q11032;Q11032
2,Q68682,Organization,Q7275;Q465613;Q3024240;Q7275;Q465613;Q3024240;...
3,Q69119,Organization,Q7278;Q7278
4,Q70738,Organization,Q7278;Q7278
...,...,...,...
3771928,L2876-S1,Family name,Q101352
3771929,L2873-S1,Family name,Q101352
3771930,L40218-S2,Family name,Q101352
3771931,L10906,Family name,Q12145366


In [220]:
results_classes.keys()

dict_keys(['Organization', 'Name', 'Artist', 'Geolocation', 'City', 'Capital', 'Town', 'Demonym', 'Product', 'Brand', 'Georegion', 'Country', 'Given name', 'Family name'])

In [221]:
df_entity_classes.to_csv('csv/entity_classes.csv', index=False)

# Get the labels for each entity

In [26]:
%%time
# From https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Cats
labels_results = {}
limit = 20000

for label, wikidata_code in tqdm(classes.items()):
    print(label, wikidata_code)
    labels_results[label] = []
    offset = 0
    while True:
        try:
             #  ?entity ?entityLabel ?entityAltLabel ?entityDescription
            query = """
                    SELECT DISTINCT ?entity ?entityLabel ?entityAltLabel ?entityDescription
                    WHERE
                    {{
                      {{
                        SELECT DISTINCT ?entity WHERE {{
                          ?entity wdt:P31|wdt:P31/wdt:P279|wdt:P31/wdt:P279/wdt:P279 wd:{};
                                  wdt:P31 ?class
                        }}
                        OFFSET {}
                        LIMIT {}
                      }}
                      SERVICE wikibase:label {{ bd:serviceParam wikibase:language 'en' }}
                    }}
                    """.format(wikidata_code, offset*limit, limit)
            if offset == 0:
                print(query)
            sparql.setQuery(query)
            sparql.setReturnFormat(JSON)
            offset += 1
            if len(sparql.query().convert()['results']['bindings']) > 0:
                result = sparql.query().convert()['results']['bindings']
                labels_results[label].append(result)
                print(f'{len(labels_results[label][-1])} of results returned at offset {offset}')
            else:
                print(f'No more results returned (offset {offset})')
                break
        except Exception as e:
            print(f'Exception at label {label} (offset {offset}) : {e}')

In [223]:
limit = 10000

for label, wikidata_code in tqdm(classes.items()):
    if label != 'Family name':
        continue
    print(label, wikidata_code)
    labels_results[label] = []
    offset = 0
    while True:
        try:
             #  ?entity ?entityLabel ?entityAltLabel ?entityDescription
            query = """
                    SELECT DISTINCT ?entity ?entityLabel ?entityAltLabel ?entityDescription
                    WHERE
                    {{
                      {{
                        SELECT DISTINCT ?entity WHERE {{
                          ?entity wdt:P31|wdt:P31/wdt:P279|wdt:P31/wdt:P279/wdt:P279 wd:{};
                                  wdt:P31 ?class
                        }}
                        OFFSET {}
                        LIMIT {}
                      }}
                      SERVICE wikibase:label {{ bd:serviceParam wikibase:language 'en' }}
                    }}
                    """.format(wikidata_code, offset*limit, limit)
            if offset == 0:
                print(query)
            sparql.setQuery(query)
            sparql.setReturnFormat(JSON)
            offset += 1
            if len(sparql.query().convert()['results']['bindings']) > 0:
                result = sparql.query().convert()['results']['bindings']
                labels_results[label].append(result)
                print(f'{len(labels_results[label][-1])} of results returned at offset {offset}')
            else:
                print(f'No more results returned (offset {offset})')
                break
        except Exception as e:
            print(f'Exception at label {label} (offset {offset}) : {e}')

HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))

Family name Q101352

                    SELECT DISTINCT ?entity ?entityLabel ?entityAltLabel ?entityDescription
                    WHERE
                    {
                      {
                        SELECT DISTINCT ?entity WHERE {
                          ?entity wdt:P31|wdt:P31/wdt:P279|wdt:P31/wdt:P279/wdt:P279 wd:Q101352;
                                  wdt:P31 ?class
                        }
                        OFFSET 0
                        LIMIT 10000
                      }
                      SERVICE wikibase:label { bd:serviceParam wikibase:language 'en' }
                    }
                    
10000 of results returned at offset 1
10000 of results returned at offset 2
10000 of results returned at offset 3
10000 of results returned at offset 4
10000 of results returned at offset 5
10000 of results returned at offset 6
10000 of results returned at offset 7
10000 of results returned at offset 8
10000 of results returned at offset 9
10000 of results retu

In [225]:
labels_results.keys()

dict_keys(['Organization', 'Name', 'Artist', 'Geolocation', 'City', 'Capital', 'Town', 'Demonym', 'Product', 'Brand', 'Georegion', 'Country', 'Given name', 'Family name'])

In [226]:
for l in labels_results:
    print(l, len(labels_results[l]))

Organization 47
Name 2
Artist 1
Geolocation 155
City 2
Capital 1
Town 3
Demonym 1
Product 4
Brand 1
Georegion 85
Country 1
Given name 8
Family name 37


In [227]:
pickle.dump(labels_results, open('gazetteers_v1.2_family_names_added_results.pickle', 'wb'))
# labels_results = pickle.load(open('gazetteers_v1.0_results.pickle', 'rb'))

In [228]:
labels_results['Name'][0][1]

{'entity': {'type': 'uri',
  'value': 'http://www.wikidata.org/entity/Q17018131'},
 'entityLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'Dusty'},
 'entityDescription': {'xml:lang': 'en',
  'type': 'literal',
  'value': 'nickname'}}

In [229]:
entity_labels_data = []

for class_label in tqdm(labels_results):
    results = [x for l in labels_results[class_label] for x in l]
    for result in results:
        entity_labels_data.append({
            'entity_qid': result['entity']['value'].split('/')[-1],
            'entity_class': class_label,
            'entity_label': '' if 'entityLabel' not in result else result['entityLabel']['value'],
            'entity_alt_label': '' if 'entityAltLabel' not in result else result['entityAltLabel']['value'],
            'entity_description':  '' if 'entityDescription' not in result else result['entityDescription']['value'],
        })

HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




In [230]:
df_entity_labels = pd.DataFrame(entity_labels_data)
df_entity_labels

Unnamed: 0,entity_qid,entity_class,entity_label,entity_alt_label,entity_description
0,Q77573108,Organization,NuGrid,"Nucleosynthesis Grid, NuGrid Collaboration",nuclear physics collaboration
1,Q96905010,Organization,ELAN,,research group
2,Q96893030,Organization,MAMBA,Modelling and Analysis for Medical and Biologi...,research group
3,Q57313333,Organization,HCC Lab,Human-Centered Computing Lab,research group at the Institute of Computer Sc...
4,Q61572963,Organization,GAMER group,,research group
...,...,...,...,...,...
6310779,Q101246725,Family name,Macalik,,
6310780,Q101042302,Family name,Roccatagliata,,family name
6310781,Q101246737,Family name,Vukšić,,family name
6310782,Q101042331,Family name,Romitelli,,family name


In [231]:
df_entity_labels.to_csv('csv/entity_labels.csv', index=False)

# Combine Everything Together 

In [73]:
subclasses_df = pd.read_csv('csv/subclasses_df.csv')
df_entity_labels = pd.read_csv('csv/entity_labels.csv')
df_entity_classes = pd.read_csv('csv/entity_classes.csv')

<div class="alert alert-warning">
df_entity_labels contains more entries because the for Geolocation and Georegion it only goes 1 link deeper
</div>

In [232]:
print(len(df_entity_labels))
df_entity_labels.head(3)

6310784


Unnamed: 0,entity_qid,entity_class,entity_label,entity_alt_label,entity_description
0,Q77573108,Organization,NuGrid,"Nucleosynthesis Grid, NuGrid Collaboration",nuclear physics collaboration
1,Q96905010,Organization,ELAN,,research group
2,Q96893030,Organization,MAMBA,Modelling and Analysis for Medical and Biologi...,research group


In [236]:
print(len(df_entity_classes))
df_entity_classes.head(3)

3771933


Unnamed: 0,entity_qid,entity_label,entity_classes
0,Q65968,Organization,Q164950
1,Q68330,Organization,Q11032;Q11032
2,Q68682,Organization,Q7275;Q465613;Q3024240;Q7275;Q465613;Q3024240;...


In [237]:
print(len(subclasses_df))
subclasses_df.head(3)

8850


Unnamed: 0,class_qid,class_label,subclass_label
0,Q79751,Organization,oligarchy
1,Q163740,Organization,nonprofit organization
2,Q193622,Organization,order


In [293]:
subclass2class = dict(zip(subclasses_df.subclass_label, subclasses_df.class_label.str.lower()))
pickle.dump(subclass2class, open('subclass2class.pickle', 'wb'))

In [234]:
qid2class = dict(zip(df_entity_labels.entity_qid, df_entity_labels.entity_class))
qid2label = dict(zip(df_entity_labels.entity_qid, df_entity_labels.entity_label))
qid2altlabel = dict(zip(df_entity_labels.entity_qid, df_entity_labels.entity_alt_label))

In [286]:
subclass2class = dict(zip(subclasses_df.class_qid, subclasses_df.class_label))
subclass2label = dict(zip(subclasses_df.class_qid, subclasses_df.subclass_label))

In [239]:
entity_per_class = {}
not_found = []

for i, entry in tqdm(df_entity_classes.iterrows(), total=len(df_entity_classes)):
    classes = entry['entity_classes']
    label = entry['entity_label']
    qid = entry['entity_qid']
    
    for cls in classes.split(';'):
        if cls not in subclass2label:
            not_found.append(cls)
            cls_label = label.lower()
        else:
            cls_label = subclass2label[cls].lower()
        
        if cls_label not in entity_per_class:
            entity_per_class[cls_label] = set()
        
        entity_per_class[cls_label].add(qid)

HBox(children=(FloatProgress(value=0.0, max=3771933.0), HTML(value='')))




In [240]:
any(subclasses_df.subclass_label == 'male given name')

True

In [241]:
[l for l in entity_per_class.keys() if 'name' in l.lower()]

['pen name',
 'trade name',
 'geographic names board',
 'brand name',
 'legal name',
 'nickname',
 'family name prefixed with mac or mc',
 'family nickname',
 'surname prefixed with ó or o',
 'double surname',
 'toponymic surname',
 'noble family name',
 'name',
 'names of god in judaism',
 'name reaction',
 'code name',
 'name suffix',
 'common name',
 'byname',
 'stage name',
 'placeholder name',
 'male given name',
 'biblical proper name',
 'ship name',
 'female given name',
 'posthumous name',
 'art-name',
 'unisex given name',
 'field name',
 'trivial name',
 'ring name',
 'han surname',
 'association football club names',
 '99 names of allah',
 'former name',
 'unavailable name',
 'fictional spacecraft name',
 'personal name',
 'street or road name',
 'nickname with carnival',
 'names of the greeks',
 'compound given name',
 'commercial name',
 'pub name',
 'gag name',
 'irish name',
 'chinese names for the god of abrahamic religions',
 'superfluous name',
 'toponymic nickname',


In [242]:
subclass2labels = {}

for subclass in entity_per_class:
    subclass2labels[subclass] = set()
    for entity in entity_per_class[subclass]:
        if entity in qid2label and len(qid2label[entity].split(' ')) == 1:
            subclass2labels[subclass].add(qid2label[entity].lower())

In [254]:
for l in subclass2labels:
    if 'abdul' in subclass2labels[l]:
        print(l)

male given name
family name


In [259]:
[(len(es), l) for l, es in sorted(subclass2labels.items(), key=lambda x: -len(x[1]))[:30]]

[(462774, 'human settlement'),
 (313954, 'family name'),
 (99328, 'hill'),
 (96785, 'mountain'),
 (51620, 'business'),
 (38020, 'valley'),
 (34978, 'organization'),
 (32642, 'locality'),
 (28307, 'male given name'),
 (24255, 'geolocation'),
 (18607, 'town in china'),
 (17975, 'library'),
 (17358, 'female given name'),
 (16058, 'enterprise'),
 (14872, 'given name'),
 (14862, 'radio station'),
 (14059, 'georegion'),
 (11143, 'town'),
 (10045, 'city'),
 (9945, 'rural council of ukraine'),
 (8657, 'park'),
 (7878, 'magazine'),
 (7677, 'village'),
 (7665, 'newspaper'),
 (7511, 'summit'),
 (7156, 'software'),
 (6648, 'settlement in galicia, spain'),
 (5773, 'monastery'),
 (5384, 'family'),
 (5313, 'sports club')]

In [270]:
label2subclasses = {}

for subclass in subclass2labels:
    for label in subclass2labels[subclass]:
        if label not in label2subclasses:
            label2subclasses[label] = []
        label2subclasses[label].append(subclass)

In [276]:
label2subclasses['morocco']

['organization',
 'sovereign state',
 'town of the united states',
 'mediterranean country',
 'country',
 'family name']

In [274]:
pickle.dump(subclass2labels, open('subclass2labels.pickle', 'wb'))
pickle.dump(label2subclasses, open('label2subclasses.pickle', 'wb'))

# Pick the final categories

In [277]:
[(len(es), l) for l, es in sorted(subclass2labels.items(), key=lambda x: -len(x[1]))[:300]]

[(462774, 'human settlement'),
 (313954, 'family name'),
 (99328, 'hill'),
 (96785, 'mountain'),
 (51620, 'business'),
 (38020, 'valley'),
 (34978, 'organization'),
 (32642, 'locality'),
 (28307, 'male given name'),
 (24255, 'geolocation'),
 (18607, 'town in china'),
 (17975, 'library'),
 (17358, 'female given name'),
 (16058, 'enterprise'),
 (14872, 'given name'),
 (14862, 'radio station'),
 (14059, 'georegion'),
 (11143, 'town'),
 (10045, 'city'),
 (9945, 'rural council of ukraine'),
 (8657, 'park'),
 (7878, 'magazine'),
 (7677, 'village'),
 (7665, 'newspaper'),
 (7511, 'summit'),
 (7156, 'software'),
 (6648, 'settlement in galicia, spain'),
 (5773, 'monastery'),
 (5384, 'family'),
 (5313, 'sports club'),
 (5062, 'city of the united states'),
 (4890, 'television station'),
 (4747, 'product'),
 (4650, 'buddhist temple'),
 (4472, 'voluntary association'),
 (4436, 'publisher'),
 (4417, 'census-designated place'),
 (4164, 'community health center'),
 (3893, 'name'),
 (3564, 'protected ar