# Explore PAN datasets 
Find out whether: 
- all PAN datasets have consistent entries for municipalities (`dansTemporalSpatial:dansSpatialCoverageText`).
  - answer: YES. 
    all PAN datasets have an entry for SpatialCoverageText, and they are always a tuple of the structure (Municipality, NDL). 
    in some cases, a number is given instead of a municipality name. these can be filtered out or converted. 

 

In [5]:
import sys
# Specify the path to the scripts folder
sys.path.append('../scripts/')
from mongodb import load_data

In [6]:
collection = load_data()

In [7]:
type(collection)

pymongo.collection.Collection

### Find out how many times each municipality occurs in the PAN datasets 

In [8]:
# Step 1: Filter datasets with a certain value for a field
filter_field = "ore:describes.author.citation:authorName"
filter_value = "Portable Antiquities of the Netherlands"

# Step 2: Use aggregation to count unique values in another field
group_field = "ore:describes.dansTemporalSpatial:dansSpatialCoverageText"

pipeline = [
    {"$match": {filter_field: filter_value}},  # Filter documents
    {"$group": {"_id": f"${group_field}", "count": {"$sum": 1}}},  # Group by unique values and count
    {"$sort": {"count": -1}},  # Sort by count in descending order
]

# Execute the pipeline
result = collection.aggregate(pipeline)

placenames = [] 

# Print the results
for item in result:
    print(f"Unique Value: {item['_id']}, Count: {item['count']}")
    plaats = item['_id'][0]
    placenames.append(plaats)

Unique Value: ['Bunnik', 'NLD'], Count: 8646
Unique Value: ['Zaltbommel', 'NLD'], Count: 5167
Unique Value: ['Overbetuwe', 'NLD'], Count: 5121
Unique Value: ['Waadhoeke', 'NLD'], Count: 3776
Unique Value: ['Buren', 'NLD'], Count: 3021
Unique Value: ['West Betuwe', 'NLD'], Count: 2403
Unique Value: ['Noardeast-Fryslân', 'NLD'], Count: 2103
Unique Value: ['Grave', 'NLD'], Count: 2014
Unique Value: ['Tiel', 'NLD'], Count: 1986
Unique Value: ['Súdwest-Fryslân', 'NLD'], Count: 1594
Unique Value: ['Texel', 'NLD'], Count: 1568
Unique Value: ['Maasdriel', 'NLD'], Count: 1547
Unique Value: ['Het Hogeland', 'NLD'], Count: 1375
Unique Value: ['Neder-Betuwe', 'NLD'], Count: 1231
Unique Value: ['Schouwen-Duiveland', 'NLD'], Count: 1116
Unique Value: ['Leeuwarden', 'NLD'], Count: 1046
Unique Value: ['Utrecht', 'NLD'], Count: 1023
Unique Value: ["'s-Hertogenbosch", 'NLD'], Count: 995
Unique Value: ['Sint Anthonis', 'NLD'], Count: 982
Unique Value: ['Goes', 'NLD'], Count: 938
Unique Value: ['Neerijnen

In [5]:
# If number in name: remove 

print(len(placenames))

368


In [7]:
print(placenames)

['Bunnik', 'Zaltbommel', 'Overbetuwe', 'Waadhoeke', 'Buren', 'West Betuwe', 'Noardeast-Fryslân', 'Grave', 'Tiel', 'Súdwest-Fryslân', 'Texel', 'Maasdriel', 'Het Hogeland', 'Neder-Betuwe', 'Schouwen-Duiveland', 'Leeuwarden', "'s-Hertogenbosch", 'Utrecht', 'Sint Anthonis', 'Goes', 'Neerijnen', 'Zevenaar', 'Veere', 'Geldermalsen', 'Amsterdam', 'Maastricht', 'Houten', 'Oss', 'Nijmegen', 'Wijk bij Duurstede', 'Schagen', 'Alphen aan den Rijn', 'Reimerswaal', 'Den Helder', 'Sint-Michielsgestel', 'Sittard-Geleen', 'Meerssen', 'Katwijk', 'Harlingen', 'Bergen (NH.)', 'Dronten', 'De Fryske Marren', 'Harderwijk', 'Alkmaar', 'Loppersum', 'Duiven', 'Middelburg', 'Veldhoven', 'Maasgouw', 'Valkenburg aan de Geul', 'Berg en Dal', 'Deventer', 'Hollands Kroon', 'Roermond', 'Dongeradeel', 'Lochem', 'Berkelland', 'Leudal', 'Beesel', 'Bladel', 'Ouder-Amstel', 'Drechterland', 'Zutphen', 'Beuningen', 'Westerkwartier', 'Druten', 'Meierijstad', 'Breda', 'Ferwerderadiel', 'Cuijk', 'Castricum', 'Tytsjerksteradiel'

In [None]:
# # Write to .txt file 
# with open('../../data/plaatsnamen.txt', 'w') as f:
#     for item in placenames:
#         f.write("%s\n" % item)