In [24]:
from pymongo import MongoClient
from bson.regex import Regex

In [2]:
client = MongoClient("mongodb://localhost:27017")
client

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)

In [3]:
db = client.db_test

# Answers

# Question 1

In [11]:
# Not necessarily the lists will be equally ordered. Then, use sets.
assert set(db.prizes.distinct("category")) == set(db.laureates.distinct("prizes.category"))

# Question 2

In [13]:
countries = set(db.laureates.distinct('diedCountry')) - set(db.laureates.distinct('bornCountry'))
print(countries)

{'Israel', 'Barbados', 'Greece', 'USSR', 'Northern Rhodesia (now Zambia)', 'Philippines', 'Tunisia', 'East Germany', 'Czechoslovakia', 'Gabon', 'Jamaica', 'Puerto Rico', 'Yugoslavia (now Serbia)'}


# Question 3

In [14]:
# The number of distinct countries of laureate affiliation for prizes
count = len(db.laureates.distinct('prizes.affiliations.country'))
print(count)

29


# Question 4

In [17]:
db.laureates.distinct("prizes.affiliations.country",{'bornCountry':'USA'})

['Australia', 'Denmark', 'USA', 'United Kingdom']

# Question 5

In [18]:
# Save a filter for prize documents with three or more laureates
criteria = {'laureates.2': {"$exists": True}}

# Save the set of distinct prize categories in documents satisfying the criteria
triple_play_categories = set(db.prizes.distinct('category', criteria))

# Confirm literature as the only category not satisfying the criteria.
assert set(db.prizes.distinct('category')) - triple_play_categories == {'literature'}

# Question 6

In [20]:
filter1 = {
            "prizes": {"$elemMatch": 
                          {"category": "physics",
                           "share": "1",
                           "year": {"$gte": "1945"}
                          }
                      }
          }

filter2 = {
            "prizes": {"$elemMatch": 
                          {"category": "physics",
                           "share": {"$ne":"1"},
                           "year": {"$gte": "1945"}
                          }
                      }
          }
single_prize = db.laureates.count_documents(filter1)
multiple_prize = db.laureates.count_documents(filter2)
ratio = single_prize/multiple_prize
print(ratio)

0.1258741258741259


# Question 7

In [21]:
# Save a filter for laureates with unshared prizes
unshared = {
    "prizes": {'$elemMatch': {
        'category': {'$nin': ["physics", "chemistry", "medicine"]},
        "share": "1",
        "year": {'$gte': "1945"},
    }}}

# Save a filter for laureates with shared prizes
shared = {
    "prizes": {'$elemMatch': {
        'category': {'$nin': ["physics", "chemistry", "medicine"]},
        "share": {'$ne': "1"},
        "year": {'$gte': "1945"},
    }}}

ratio = db.laureates.count_documents(unshared) / db.laureates.count_documents(shared)
print(ratio)

1.3653846153846154


# Question 8

In [22]:
# Save a filter for organization laureates with prizes won before 1945
before = {
    'gender': 'org',
    'prizes.year': {'$lt': "1945"},
    }

# Save a filter for organization laureates with prizes won in or after 1945
in_or_after = {
    'gender': 'org',
    'prizes.year': {'$gte': "1945"},
    }

n_before = db.laureates.count_documents(before)
n_in_or_after = db.laureates.count_documents(in_or_after)
ratio = n_in_or_after / (n_in_or_after + n_before)
print(ratio)

0.84


# Question 9

In [26]:
db.laureates.count_documents({"firstname": Regex("^G"), "surname": Regex("^S")})

9

# Question 10

In [27]:
# Filter for laureates with "Germany" in their "bornCountry" value
criteria = {"bornCountry": Regex("Germany")}
print(set(db.laureates.distinct("bornCountry", criteria)))

{'Germany (now Russia)', 'West Germany (now Germany)', 'Germany', 'Prussia (now Germany)', 'Bavaria (now Germany)', 'Schleswig (now Germany)', 'W&uuml;rttemberg (now Germany)', 'Germany (now Poland)', 'Germany (now France)', 'Mecklenburg (now Germany)', 'East Friesland (now Germany)', 'Hesse-Kassel (now Germany)'}


# Question 11

In [28]:
# Save a filter for laureates with prize motivation values containing "transistor" as a substring
criteria = {'prizes.motivation': Regex('transistor')}

# Save the field names corresponding to a laureate's first name and last name
first, last = 'firstname', 'surname'
print([(laureate[first], laureate[last]) for laureate in db.laureates.find(criteria)])

[('William Bradford', 'Shockley'), ('John', 'Bardeen'), ('Walter Houser', 'Brattain')]
