In [20]:
from pymongo import MongoClient
from operator import itemgetter

In [19]:
client = MongoClient("mongodb://localhost:27017")
db = client.db_test

# Projection in MongoDB

## Find returns a cursor (iterator)

In [7]:
# In projections, 1 represent True and 0 False
docs = db.laureates.find(filter = {},projection={"prizes.affiliations": 1,
"_id": 0})

In [4]:
print(next(docs))

{'prizes': [{'affiliations': [{'name': 'University of Pennsylvania', 'city': 'Philadelphia, PA', 'country': 'USA'}]}]}


# Missing fields

In [13]:
# Remember,only projected fields that exist are returned
docs = db.laureates.find(filter = {'gender':'org'},
                         projection = ["bornCountry",
                                       "firstname"])
list(docs)[:2]

[{'_id': ObjectId('65550468d1fc86246b056ee7'),
  'firstname': 'Comité international de la Croix Rouge (International Committee of the Red Cross)'},
 {'_id': ObjectId('65550468d1fc86246b056f39'),
  'firstname': 'Friends Service Council (The Quakers)'}]

# Simple aggregation with pure python

In [16]:
docs = db.laureates.find({}, ["prizes"])
# using comprehension
sum([len(doc["prizes"]) for doc in docs])

941

In [17]:
docs = list(db.prizes.find({"category": "physics"}, ["year"]))
print([doc["year"] for doc in docs][:5])

['2018', '2012', '2011', '2014', '2015']


## sorting a list with dictionaries

In [23]:
sorted_docs = sorted(docs, key=itemgetter("year"), reverse = True)

In [24]:
print([doc["year"] for doc in sorted_docs][:5])

['2018', '2017', '2016', '2015', '2014']


## itemgetter in detail

In [26]:
dic = {'value':20}
func = itemgetter('value')
func(dic)

20

## sorting a list with dictionaries - controlled example

In [31]:
store = [
            {'item':'Nike Revolution 6 Next Nature','value':249.99},
            {'item':'Nike Air Force 1','value':799.99}
]
key = itemgetter('value')
sorted_store = sorted(store, key=key, reverse = True)
sorted_store

[{'item': 'Nike Air Force 1', 'value': 799.99},
 {'item': 'Nike Revolution 6 Next Nature', 'value': 249.99}]

# Primary and secondary sorting

In [32]:
# 1 means ascending and -1 descending
for doc in db.prizes.find(
    {"year": {"$gt": "1966", "$lt": "1970"}},
    ["category", "year"],
    sort=[("year", 1), ("category", -1)]):
    print("{year} {category}".format(**doc))

1967 physics
1967 medicine
1967 literature
1967 chemistry
1968 physics
1968 peace
1968 medicine
1968 literature
1968 chemistry
1969 physics
1969 peace
1969 medicine
1969 literature
1969 economics
1969 chemistry


## Obs: JavaScript objects retain key order as entered. If this behavior is desired, use [OrderedDict](https://docs.python.org/3/library/collections.html#collections.OrderedDict)

# MongoDB indexation

## Before indexation

In [34]:
%%timeit
docs = list(db.prizes.find({"year": "1901"}))

1.13 ms ± 144 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## After indexation

In [35]:
db.prizes.create_index([("year", 1)])

'year_1'

In [37]:
%%timeit
docs = list(db.prizes.find({"year": "1901"}))

The slowest run took 4.36 times longer than the fastest. This could mean that an intermediate result is being cached.
398 µs ± 198 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## Adding multiple indexation

In [38]:
db.prizes.create_index([("category", 1), ("year", 1)])

'category_1_year_1'

In [44]:
list(db.prizes.find({"category": "economics"},sort=[("year", 1)]))[0]


{'_id': ObjectId('6555048619e46de1de2d02f8'),
 'year': '1969',
 'category': 'economics',
 'laureates': [{'id': '677',
   'firstname': 'Ragnar',
   'surname': 'Frisch',
   'motivation': '"for having developed and applied dynamic models for the analysis of economic processes"',
   'share': '2'},
  {'id': '678',
   'firstname': 'Jan',
   'surname': 'Tinbergen',
   'motivation': '"for having developed and applied dynamic models for the analysis of economic processes"',
   'share': '2'}]}

## Getting available indexes

In [45]:
db.laureates.index_information()

{'_id_': {'v': 2, 'key': [('_id', 1)]}}

# Understanding the queries

In [46]:
db.laureates.find(
{"firstname": "Marie"}, {"bornCountry": 1, "_id": 0}).explain()

{'explainVersion': '2',
 'queryPlanner': {'namespace': 'db_test.laureates',
  'indexFilterSet': False,
  'parsedQuery': {'firstname': {'$eq': 'Marie'}},
  'queryHash': 'D45B05FF',
  'planCacheKey': '966CA941',
  'maxIndexedOrSolutionsReached': False,
  'maxIndexedAndSolutionsReached': False,
  'maxScansToExplodeReached': False,
  'winningPlan': {'queryPlan': {'stage': 'PROJECTION_SIMPLE',
    'planNodeId': 2,
    'transformBy': {'bornCountry': True, '_id': False},
    'inputStage': {'stage': 'COLLSCAN',
     'planNodeId': 1,
     'filter': {'firstname': {'$eq': 'Marie'}},
     'direction': 'forward'}},
   'slotBasedPlan': {'slots': '$$RESULT=s8 env: { s3 = 1700420515627 (NOW), s2 = Nothing (SEARCH_META), s7 = "Marie", s1 = TimeZoneDatabase(Europe/Berlin...America/Porto_Acre) (timeZoneDB) }',
    'stages': '[2] mkbson s8 s5 [bornCountry] keep [] true false \n[1] filter {traverseF(s4, lambda(l1.0) { ((l1.0 == s7) ?: false) }, false)} \n[1] scan s5 s6 none none none none lowPriority [s4 =

# Adding limits

In [47]:
for doc in db.prizes.find({"laureates.share": "3"}, limit=3):
    print("{year} {category}".format(**doc))

2013 chemistry
2013 medicine
2013 economics


# Skipping and paging through the results

In [49]:
for doc in db.prizes.find({"laureates.share": "3"}, limit=3):
    print("{year} {category}".format(**doc))

2013 chemistry
2013 medicine
2013 economics


In [51]:
for doc in db.prizes.find({"laureates.share": "3"}, skip=6, limit=3):
    print("{year} {category}".format(**doc))

2014 physics
2016 chemistry
2015 chemistry


In [52]:
for doc in db.prizes.find({"laureates.share": "3"}, skip=3, limit=3):
    print("{year} {category}".format(**doc))

2011 peace
2010 chemistry
2008 chemistry


# Using cursor methods for {sort, skip, limit}

In [54]:
for doc in db.prizes.find({"laureates.share": "3"}).limit(3):
    print("{year} {category}".format(**doc))

2013 chemistry
2013 medicine
2013 economics


In [55]:
for doc in (db.prizes.find({"laureates.share": "3"}).skip(3).limit(3)):
    print("{year} {category}".format(**doc))

2011 peace
2010 chemistry
2008 chemistry


In [58]:
for doc in (db.prizes.find({"laureates.share": "3"})\
.sort([("year", 1)])\
.skip(3)\
.limit(3)):
    print("{year} {category}".format(**doc))

1954 medicine
1956 physics
1956 medicine


# More examples

In [60]:
cursor1 = (db.prizes.find({"laureates.share": "3"}).skip(3).limit(3)
.sort([("year", 1)]))
cursor2 = (db.prizes.find({"laureates.share": "3"}).skip(3).limit(3)
.sort("year", 1))
cursor3 = (db.prizes.find({"laureates.share": "3"}).skip(3).limit(3)
.sort("year"))

In [61]:
docs = list(cursor1)
assert docs == list(cursor2) == list(cursor3)
for doc in docs:
    print("{year} {category}".format(**doc))

1954 medicine
1956 physics
1956 medicine


In [62]:
doc = db.prizes.find_one({"laureates.share": "3"},
skip=3, sort=[("year", 1)])
print("{year} {category}".format(**doc))

1954 medicine


# Exercises

1) Given the query:
`db.laureates.find_one({"prizes": {"$elemMatch": {"category": "physics", "year": "1903"}}})
`, build a projection that will fetch ONLY the laureates' full names and prize share info? I encourage you to experiment with the console and re-familiarize yourself with the structure of laureate collection documents.

2) Collect the full names of laureates whose initials are "G.S."

3) Check if the sum of the shares sum up to 100%

4) For each document, return the fields in the following order year: surname1, surname2,... and surnamen

5) Find the first five prizes with one or more laureates sharing 1/4 of the prize. Project our prize category, year, and laureates' motivations.

6) You and a friend want to set up a website that gives information on Nobel laureates with awards relating to particle phenomena. You want to present these laureates one page at a time, with three laureates per page. You decide to order the laureates chronologically by award year. When there is a "tie" in ordering (i.e. two laureates were awarded prizes in the same year), you want to order them alphabetically by surname.