In [1]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

# assign the met database to a variable name
db = mongo['met']

# assign the collection to a variable
artifacts = db['artifacts']

In [6]:
artifacts.find_one()

{'_id': ObjectId('64b5c397eb87d19a04edc005'),
 'objectID': 314893,
 'isHighlight': False,
 'accessionNumber': '1987.394.543',
 'accessionYear': '1987',
 'isPublicDomain': True,
 'primaryImage': 'https://images.metmuseum.org/CRDImages/ao/original/VS1987_394_543.jpg',
 'primaryImageSmall': 'https://images.metmuseum.org/CRDImages/ao/web-large/VS1987_394_543.jpg',
 'additionalImages': [],
 'constituents': None,
 'department': 'The Michael C. Rockefeller Wing',
 'objectName': 'Disk',
 'title': 'Animal Disk',
 'culture': 'Peruvian',
 'period': '',
 'dynasty': '',
 'reign': '',
 'portfolio': '',
 'artistRole': '',
 'artistPrefix': '',
 'artistDisplayName': '',
 'artistDisplayBio': '',
 'artistSuffix': '',
 'artistAlphaSort': '',
 'artistNationality': '',
 'artistBeginDate': '',
 'artistEndDate': '',
 'artistGender': '',
 'artistWikidata_URL': '',
 'artistULAN_URL': '',
 'objectDate': 'A.D. 200–900',
 'objectBeginDate': 200,
 'objectEndDate': 900,
 'medium': 'Copper, gilt',
 'dimensions': 'H x

In [3]:
# Build the aggregation pipeline
# Write a match query to find only the documents about artifacts that have a width greater than or equal to 40cm.
match_query = {'$match': {'measurements.elementMeasurements.Width': {'$gte': 40}}}

# Write an aggregation query that counts the number of documents, grouped by "country"
group_query = {'$group': {'_id': "$country", 'count': { '$sum': 1 }}}

# Create a dictionary that will allow the pipeline to sort by count in descending order
sort_values = {'$sort': { 'count': -1 }}

# Put the pipeline together
pipeline = [match_query, group_query, sort_values]


In [4]:
# Run the pipeline through the aggregate method and save the results to a variable
results = list(artifacts.aggregate(pipeline))

In [5]:
# Print the number of countries in the result
print("Number of countries in result: ", len(results))

Number of countries in result:  24


In [6]:
# Print the first 10 results
pprint(results[0:10])

[{'_id': 'Papua New Guinea', 'count': 232},
 {'_id': 'Indonesia', 'count': 31},
 {'_id': 'Peru', 'count': 13},
 {'_id': 'Mexico', 'count': 8},
 {'_id': 'Australia', 'count': 7},
 {'_id': 'Mali', 'count': 6},
 {'_id': "Côte d'Ivoire", 'count': 5},
 {'_id': 'Democratic Republic of the Congo', 'count': 5},
 {'_id': 'Malaysia', 'count': 4},
 {'_id': 'Ghana', 'count': 4}]


In [7]:
# Convert mongo result to Pandas DataFrame
result_df = pd.DataFrame(results)

print("Rows in DataFrame: ", len(result_df))
result_df.head(10)

Rows in DataFrame:  24


Unnamed: 0,_id,count
0,Papua New Guinea,232
1,Indonesia,31
2,Peru,13
3,Mexico,8
4,Australia,7
5,Mali,6
6,Côte d'Ivoire,5
7,Democratic Republic of the Congo,5
8,Malaysia,4
9,Ghana,4


Data Source: [The Metropolitan Museum of Art](https://www.metmuseum.org/) (2022). The Metropolitan Museum of Art Collection API https://metmuseum.github.io/. Licensed under the [Creative Commons 0 License](https://creativecommons.org/publicdomain/zero/1.0/).<br />
Accessed Oct 3, 2022. Data collected from departmentId=5 ("Arts of Africa, Oceania, and the Americas") and search string "animal".