In [1]:
import pymongo
import pprint

In [2]:
# Replace XXXX with your connection URI from the Atlas UI
free_tier_client = pymongo.MongoClient(XXX)

In [3]:
# We're using the people-raw dataset from the Cleansing Data with Updates assessment
people = free_tier_client.cleansing["people-raw"]


In [4]:
# This is a helper function to reduce the output of explain to a few key metrics
def distilled_explain(explain_output):
    return {
        'executionTimeMillis': explain_output['executionStats']['executionTimeMillis'],
        'totalDocsExamined'  : explain_output['executionStats']['totalDocsExamined'],
        'nReturned'          : explain_output['executionStats']['nReturned']
    }

In [5]:
query_1_stats = people.find({
  "address.state": "Nebraska",
  "last_name": "Miller",
}).explain()

query_2_stats = people.find({
  "first_name": "Harry",
  "last_name": "Reed"
}).explain()

In [6]:
# This is to provide a baseline for how long it takes to execute these queries
print(distilled_explain(query_1_stats))
print(distilled_explain(query_2_stats))

{'executionTimeMillis': 69, 'totalDocsExamined': 50474, 'nReturned': 6}
{'executionTimeMillis': 25, 'totalDocsExamined': 50474, 'nReturned': 1}


In [7]:
# Replace "YYYY" with the best index to increase the performance of the two queries above
people.create_index([("first_name", pymongo.ASCENDING), ("last_name", pymongo.ASCENDING)])

'first_name_1_last_name_1'

In [8]:
query_1_stats = people.find({
  "address.state": "Nebraska",
  "last_name": "Miller",
}).explain()

query_2_stats = people.find({
  "first_name": "Harry",
  "last_name": "Reed"
}).explain()

In [9]:
# If everything went well, both queries should now have *much* lower execution times and documents examined
print(distilled_explain(query_1_stats))
print(distilled_explain(query_2_stats))

{'executionTimeMillis': 30, 'totalDocsExamined': 50474, 'nReturned': 6}
{'executionTimeMillis': 1, 'totalDocsExamined': 1, 'nReturned': 1}


In [None]:
# we can see the time optimazation that occured
#  69ms --> 30ms, 25ms --> 1ms