In [1]:
#!pip install pymongo

In [2]:
from pymongo import MongoClient

In [3]:
client = MongoClient("mongodb://localhost:27017")
client

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)

# Listing Database names

In [4]:
print(client.list_database_names())

['admin', 'config', 'db_test', 'local']


# Using as a method

In [5]:
db = client.db_test
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'db_test')

# Using the "dictionary" mode

In [6]:
client['db_test']

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'db_test')

# Listing the collections

In [7]:
client.db_test.list_collection_names()

['laureates', 'prizes']

# Getting the collections

In [8]:
laureates_collection = db["laureates"]
prize_collection = db['prizes']

# Counting the number of registers

In [9]:
filter_dict = {}

In [10]:
laureates_collection.count_documents(filter_dict)

934

In [11]:
prize_collection.count_documents(filter_dict)

590

# Getting just one document

In [12]:
laureates_collection.find_one(filter_dict)

{'_id': ObjectId('65550468d1fc86246b056dae'),
 'id': '96',
 'firstname': 'John Robert',
 'surname': 'Schrieffer',
 'born': '1931-05-31',
 'died': '0000-00-00',
 'bornCountry': 'USA',
 'bornCountryCode': 'US',
 'bornCity': 'Oak Park, IL',
 'gender': 'male',
 'prizes': [{'year': '1972',
   'category': 'physics',
   'share': '3',
   'motivation': '"for their jointly developed theory of superconductivity, usually called the BCS-theory"',
   'affiliations': [{'name': 'University of Pennsylvania',
     'city': 'Philadelphia, PA',
     'country': 'USA'}]}]}

In [13]:
prize_collection.find_one(filter_dict)

{'_id': ObjectId('6555048619e46de1de2d01d3'),
 'year': '2018',
 'category': 'medicine',
 'laureates': [{'id': '958',
   'firstname': 'James P.',
   'surname': 'Allison',
   'motivation': '"for their discovery of cancer therapy by inhibition of negative immune regulation"',
   'share': '2'},
  {'id': '959',
   'firstname': 'Tasuku',
   'surname': 'Honjo',
   'motivation': '"for their discovery of cancer therapy by inhibition of negative immune regulation"',
   'share': '2'}]}

# Getting the documents fields

In [14]:
laureates_sample = laureates_collection.find_one(filter_dict)
print(list(laureates_sample.keys()))

['_id', 'id', 'firstname', 'surname', 'born', 'died', 'bornCountry', 'bornCountryCode', 'bornCity', 'gender', 'prizes']


In [15]:
prize_sample = prize_collection.find_one(filter_dict)
print(list(prize_sample.keys()))

['_id', 'year', 'category', 'laureates']


# Exploring the filter_dicts

In [16]:
filter_dict = {'bornCountry':'Japan',
               'bornCity':'Osaka'}
laureates_collection.find_one(filter_dict)

{'_id': ObjectId('65550468d1fc86246b056e88'),
 'id': '97',
 'firstname': 'Leo',
 'surname': 'Esaki',
 'born': '1925-03-12',
 'died': '0000-00-00',
 'bornCountry': 'Japan',
 'bornCountryCode': 'JP',
 'bornCity': 'Osaka',
 'gender': 'male',
 'prizes': [{'year': '1973',
   'category': 'physics',
   'share': '4',
   'motivation': '"for their experimental discoveries regarding tunneling phenomena in semiconductors and superconductors, respectively"',
   'affiliations': [{'name': 'IBM Thomas J. Watson Research Center',
     'city': 'Yorktown Heights, NY',
     'country': 'USA'}]}]}

## Composite filters

In [17]:
filter_dict = {'prizes': [{'year': '1973',
   'category': 'physics',
   'share': '4',
   'motivation': '"for their experimental discoveries regarding tunneling phenomena in semiconductors and superconductors, respectively"',
   'affiliations': [{'name': 'IBM Thomas J. Watson Research Center',
     'city': 'Yorktown Heights, NY',
     'country': 'USA'}]}]}
laureates_collection.find_one(filter_dict)

{'_id': ObjectId('65550468d1fc86246b056e88'),
 'id': '97',
 'firstname': 'Leo',
 'surname': 'Esaki',
 'born': '1925-03-12',
 'died': '0000-00-00',
 'bornCountry': 'Japan',
 'bornCountryCode': 'JP',
 'bornCity': 'Osaka',
 'gender': 'male',
 'prizes': [{'year': '1973',
   'category': 'physics',
   'share': '4',
   'motivation': '"for their experimental discoveries regarding tunneling phenomena in semiconductors and superconductors, respectively"',
   'affiliations': [{'name': 'IBM Thomas J. Watson Research Center',
     'city': 'Yorktown Heights, NY',
     'country': 'USA'}]}]}

# Couting with filters

In [18]:
filter_dict = {'year': '2018','category':'physics'}
prize_collection.count_documents(filter_dict)

1

# Query operators

## in

In [19]:
filter_dict = {'diedCountry': {'$in':['France','USA']}}
laureates_collection.count_documents(filter_dict)

259

## ne

In [20]:
filter_dict = {'diedCountry': {'$ne':['France','USA']}}
laureates_collection.count_documents(filter_dict)

934

## gt (>), gte(>=), lt (<), lte (<=)

In [21]:
filter_dict = {
'diedCountry': {
'$gt': 'Belgium',
'$lte': 'USA'}
}
laureates_collection.count_documents(filter_dict)

455

# Dot notation

## Checking the structure

In [26]:
laureates_collection.find_one({
"firstname": "Walter",
"surname": "Kohn"})

{'_id': ObjectId('65550468d1fc86246b056f24'),
 'id': '290',
 'firstname': 'Walter',
 'surname': 'Kohn',
 'born': '1923-03-09',
 'died': '2016-04-19',
 'bornCountry': 'Austria',
 'bornCountryCode': 'AT',
 'bornCity': 'Vienna',
 'diedCountry': 'USA',
 'diedCountryCode': 'US',
 'diedCity': 'Santa Barbara, CA',
 'gender': 'male',
 'prizes': [{'year': '1998',
   'category': 'chemistry',
   'share': '2',
   'motivation': '"for his development of the density-functional theory"',
   'affiliations': [{'name': 'University of California',
     'city': 'Santa Barbara, CA',
     'country': 'USA'}]}]}

## Accessing the affiliations name with dot notation

In [24]:
filter_dict = {'prizes.affiliations.name':(
"University of California")}
laureates_collection.count_documents(filter_dict)

34

In [25]:
filter_dict = {"prizes.affiliations.city":('Berkeley, CA')}
laureates_collection.count_documents(filter_dict)

19

# Using the exists operator

In [30]:
criteria = {"bornCountry": {"$exists": False}}
laureates_collection.count_documents(criteria)

33

## Checking the missing fields: 

In [31]:
laureates_collection.find_one(criteria)

{'_id': ObjectId('65550468d1fc86246b056ee7'),
 'id': '482',
 'firstname': 'Comité international de la Croix Rouge (International Committee of the Red Cross)',
 'born': '0000-00-00',
 'died': '0000-00-00',
 'gender': 'org',
 'prizes': [{'year': '1917',
   'category': 'peace',
   'share': '1',
   'affiliations': [[]]},
  {'year': '1944', 'category': 'peace', 'share': '1', 'affiliations': [[]]},
  {'year': '1963', 'category': 'peace', 'share': '2', 'affiliations': [[]]}]}

# Exploring lists of documents: checking multiple prizes

## Checking people with at least one prize

In [33]:
laureates_collection.count_documents({"prizes.0": {"$exists": True}})

934

## Checking people with two or more prizes

In [34]:
laureates_collection.count_documents({"prizes.1": {"$exists": True}})

6

## Checking how it looks like

In [40]:
multi_prize = laureates_collection.find_one({"prizes.1": {"$exists": True}})
multi_prize

{'_id': ObjectId('65550468d1fc86246b056dec'),
 'id': '217',
 'firstname': 'Linus Carl',
 'surname': 'Pauling',
 'born': '1901-02-28',
 'died': '1994-08-19',
 'bornCountry': 'USA',
 'bornCountryCode': 'US',
 'bornCity': 'Portland, OR',
 'diedCountry': 'USA',
 'diedCountryCode': 'US',
 'diedCity': 'Big Sur, CA',
 'gender': 'male',
 'prizes': [{'year': '1954',
   'category': 'chemistry',
   'share': '1',
   'motivation': '"for his research into the nature of the chemical bond and its application to the elucidation of the structure of complex substances"',
   'affiliations': [{'name': 'California Institute of Technology (Caltech)',
     'city': 'Pasadena, CA',
     'country': 'USA'}]},
  {'year': '1962',
   'category': 'peace',
   'share': '1',
   'affiliations': [{'name': 'California Institute of Technology (Caltech)',
     'city': 'Pasadena, CA',
     'country': 'USA'}]}]}

# Exercises

1) The database has two collections, prizes and laureates. In the prizes collection, every document correspond to a single Nobel prize, and in the laureates collection - to a single Nobel laureate. Which one of the alternatives is correct?
- #laureates = #prizes
- #laureates>prizes
- #prizes>#laureates

2)  Save a list of names of the databases managed by client and a list of names of the collections managed by the "nobel" database. Print both results

3) List _all_ fields of both collections. If a field has subfields or an array of fields, save a tuple with field and subfield

4) What is the number of laureates born prior to 1800? What about prior to 1700?



5) Create a filter for :
- Germany-born laureates
- Germany-born laureates who died in the USA 
- Germany-born laureates who died in the USA and with the first name "Albert"

6) How many laureates were born in USA, Canada or Mexico? More than that, how many died in the USA but were not born there?

7) If we want to count the number of laureates born in Austria with a prize affiliation country that is not also Austria, what MongoDB concepts/tools should we use?



8) Count the number of laureates born in Austria with a prize affiliation country that is not also Austria.

9) Check if there is any winner without a born field. After that use a filter document to find a document with at least three elements in its prizes array