In [5]:
from pymongo import MongoClient
import json
import urllib.parse

with open('credentials_mongodb.json') as f:
    login = json.load(f)

username = login['username']
password = urllib.parse.quote(login['password'])
host = login['host']
url = "mongodb+srv://{}:{}@{}/?retryWrites=true&w=majority".format(username, password, host)
client = MongoClient(url)

### 1.1


List the databases that exist on your MongoDB Atlas cluster after loading sample databases. 

In [9]:
client.list_database_names()

['sample_airbnb',
 'sample_analytics',
 'sample_geospatial',
 'sample_guides',
 'sample_mflix',
 'sample_restaurants',
 'sample_supplies',
 'sample_training',
 'sample_weatherdata',
 'admin',
 'local']

### 1.2


List the collections stored in the `sample_mflix` and `sample_airbnb` databases. You can do this either by checking out the databases using Compass or by using `pymongo`'s `.list_collection_names()` method.

In [12]:
client["sample_mflix"].list_collection_names()

['users', 'theaters', 'comments', 'movies', 'sessions', 'embedded_movies']

In [13]:
client.sample_airbnb.list_collection_names()

['listingsAndReviews']

### 2.1


Retrieve one (random) document associated with a movie produced in 2015.


In [17]:
client['sample_mflix']['movies'].find_one(filter={'year':2015})

{'_id': ObjectId('573a13adf29313caabd2b765'),
 'plot': "A new theme park is built on the original site of Jurassic Park. Everything is going well until the park's newest attraction--a genetically modified giant stealth killing machine--escapes containment and goes on a killing spree.",
 'genres': ['Action', 'Adventure', 'Sci-Fi'],
 'runtime': 124,
 'metacritic': 59,
 'rated': 'PG-13',
 'cast': ['Chris Pratt',
  'Bryce Dallas Howard',
  'Irrfan Khan',
  "Vincent D'Onofrio"],
 'num_mflix_comments': 0,
 'poster': 'https://m.media-amazon.com/images/M/MV5BNzQ3OTY4NjAtNzM5OS00N2ZhLWJlOWUtYzYwZjNmOWRiMzcyXkEyXkFqcGdeQXVyMTMxODk2OTU@._V1_SY1000_SX677_AL_.jpg',
 'title': 'Jurassic World',
 'fullplot': '22 years after the original Jurassic Park failed, the new park (also known as Jurassic World) is open for business. After years of studying genetics the scientists on the park genetically engineer a new breed of dinosaur. When everything goes horribly wrong, will our heroes make it off the island

### 2.2


Retrieve all TV series produced in 1995.


In [20]:
client['sample_mflix']['movies'].distinct("type")

['movie', 'series']

In [21]:
list(
    client['sample_mflix']['movies'].find(filter={'type':'series', 'year':1995})
)

[{'_id': ObjectId('573a1399f29313caabceee5e'),
  'plot': 'Captain Woodrow Call, now retired from the Rangers, is a bounty hunter. He is hired by an eastern rail baron to track down Joey Garza, a new kind of killer, only a boy, who kills from a ...',
  'genres': ['Drama', 'Western'],
  'runtime': 300,
  'cast': ['James Garner', 'Sissy Spacek', 'Sam Shepard', 'Ned Beatty'],
  'poster': 'https://m.media-amazon.com/images/M/MV5BMjA5MTc2NjU5NF5BMl5BanBnXkFtZTcwODUzNjUyMQ@@._V1_SY1000_SX677_AL_.jpg',
  'title': 'Streets of Laredo',
  'fullplot': "Captain Woodrow Call, now retired from the Rangers, is a bounty hunter. He is hired by an eastern rail baron to track down Joey Garza, a new kind of killer, only a boy, who kills from a distance with a rifle. Joined by his old compadre Pea Eye, it is a long ride to south Texas and the Mexican side of the border, where the past, in the form of Maria Garza, Joey's mother, haunts Call.",
  'languages': ['English'],
  'released': datetime.datetime(1995,

### 2.3


Retrieve the title and cast of movies produced in 2010, but limit your results to 5 documents.

> **Note:** Don't forget to exclude the `_id` field from your returned documents.


In [24]:
list(
    client['sample_mflix']['movies'].find(filter={'year':2010, 'type':'movie'}, projection={'title':1, 'cast':1, '_id':0}).limit(5)
)

[{'cast': ['èva Gèbor', 'Istvèn Znamenèk', 'èkos Horvèth', 'Lia Pokorny'],
  'title': 'Pèl Adrienn'},
 {'title': 'In My Sleep',
  'cast': ['Philip Winchester',
   'Tim Draxl',
   'Lacey Chabert',
   'Abigail Spencer']},
 {'cast': ['Mandy Moore', 'Zachary Levi', 'Donna Murphy', 'Ron Perlman'],
  'title': 'Tangled'},
 {'cast': ['Nikita Mikhalkov',
   'Oleg Menshikov',
   'Nadezhda Mikhalkova',
   'Sergey Makovetskiy'],
  'title': 'Utomlyonnye solntsem 2: Predstoyanie'},
 {'title': 'Dinner for Schmucks',
  'cast': ['Steve Carell',
   'Paul Rudd',
   'Zach Galifianakis',
   'Jemaine Clement']}]

### 2.4


Retrieve the top 15 movies produced in 2010 that have the longest duration. Exclude TV series from your results. The returned documents should only contain the `title` and `runtime` fields (exclude the `_id` field).

> **Note:** It's ok if your results contain duplicate movies.

In [27]:
list(
    client['sample_mflix']['movies'].find(filter={'year':2010, 'type':'movie'}, projection={'title':1, 'runtime':1, '_id':0}).sort('runtime',-1).limit(15)
)

[{'runtime': 272, 'title': 'Mysteries of Lisbon'},
 {'runtime': 181, 'title': 'Utomlyonnye solntsem 2: Predstoyanie'},
 {'runtime': 181, 'title': 'Aurora'},
 {'runtime': 180, 'title': 'Thorne: Sleepyhead'},
 {'runtime': 180, 'title': 'The Autobiography of Nicolae Ceausescu'},
 {'runtime': 178, 'title': 'Riverworld'},
 {'runtime': 174, 'title': 'Enthiran'},
 {'runtime': 170, 'title': 'Khaleja'},
 {'runtime': 170, 'title': 'We Believed'},
 {'title': 'My Name Is Khan', 'runtime': 165},
 {'runtime': 163, 'title': 'Moss'},
 {'runtime': 163, 'title': 'Raajneeti'},
 {'runtime': 160, 'title': 'Singam'},
 {'runtime': 159, 'title': 'Black Venus'},
 {'runtime': 157, 'title': 'Will You Cross the Skies for Me?'}]

### 2.5


For year 2015, return the number of movies with a metacritic rating of exactly 90.

In [30]:
client['sample_mflix']['movies'].count_documents(filter={'metacritic':90, 'year':2015})

3

### 2.6


Retrieve the title and runtime of the 10 shortest movies in the `movies` collection.


In [33]:
list(
    client['sample_mflix']['movies'].find(
        filter={'runtime':{'$exists':True}},
        projection={'title':1, 'runtime':1, '_id':0}).sort('runtime',1).limit(10)
)


[{'runtime': 1, 'title': 'Neko no shukai'},
 {'runtime': 1, 'title': 'The Kiss'},
 {'runtime': 1, 'title': 'The Kiss'},
 {'runtime': 2, 'title': 'Fresh Guacamole'},
 {'runtime': 2, 'title': 'Pixels'},
 {'runtime': 2, 'title': 'Game Over'},
 {'runtime': 2, 'title': 'Andrè and Wally B.'},
 {'runtime': 2, 'title': 'Luxo Jr.'},
 {'runtime': 3, 'title': 'Sisyphus'},
 {'runtime': 3, 'title': 'Gagarin'}]

## Exercise 3: Conditionals, embedded documents & arrays
---

### 3.1


Retrieve the title, production year, and number of awards of all movies that

- have been produced between 1950 and 2000 (inclusive)
- have an IMDB rating of 8.5 or better
- won at least 30 awards.

Sort the results by production year in descending order.

In [37]:
list(
    client['sample_mflix']['movies'].find(
        filter={'year':{'$gte':1950, '$lte':2000}, 'imdb.rating':{'$gte':8.5}, 'awards.wins':{'$gte':30}},
        projection={'title':1, 'year':1, 'awards.wins':1, '_id':0}
    ).sort('year',-1)
)

[{'year': 2000, 'title': 'Memento', 'awards': {'wins': 54}},
 {'year': 2000, 'title': 'Gladiator', 'awards': {'wins': 63}},
 {'year': 1999, 'title': 'The Matrix', 'awards': {'wins': 37}},
 {'year': 1998, 'title': 'Saving Private Ryan', 'awards': {'wins': 83}},
 {'title': 'Life Is Beautiful', 'awards': {'wins': 66}, 'year': 1997},
 {'year': 1997, 'title': 'Life Is Beautiful', 'awards': {'wins': 66}},
 {'year': 1995, 'title': 'The Usual Suspects', 'awards': {'wins': 36}},
 {'year': 1995, 'title': 'Se7en', 'awards': {'wins': 32}},
 {'year': 1994, 'title': 'Pulp Fiction', 'awards': {'wins': 64}},
 {'year': 1994, 'title': 'Forrest Gump', 'awards': {'wins': 46}},
 {'title': "Schindler's List", 'awards': {'wins': 81}, 'year': 1993},
 {'year': 1991, 'title': 'The Silence of the Lambs', 'awards': {'wins': 56}},
 {'year': 1990, 'title': 'Goodfellas', 'awards': {'wins': 43}},
 {'year': 1981, 'title': 'Raiders of the Lost Ark', 'awards': {'wins': 32}},
 {'year': 1977,
  'title': 'Star Wars: Episod

### 3.2


Find the top 15 highest-rated movies according to IMDB for movies that have at least 100,000 votes. Your returned documents should only contain the `title`, `year`, and `imdb.rating` fields.


In [40]:
list(
    client['sample_mflix']['movies'].find(
        filter={'imdb.votes':{'$gte':100000, '$exists':True}, 'type':'movie'},
        projection={'title':1, 'year':1, 'imdb.rating':1, '_id':0}).sort('imdb.rating', -1).limit(15)
)

[{'imdb': {'rating': 9.3}, 'year': 1994, 'title': 'The Shawshank Redemption'},
 {'imdb': {'rating': 9.3}, 'year': 1994, 'title': 'The Shawshank Redemption'},
 {'imdb': {'rating': 9.2}, 'year': 1972, 'title': 'The Godfather'},
 {'imdb': {'rating': 9.1}, 'year': 1974, 'title': 'The Godfather: Part II'},
 {'imdb': {'rating': 9.0}, 'year': 2008, 'title': 'The Dark Knight'},
 {'imdb': {'rating': 8.9},
  'year': 2003,
  'title': 'The Lord of the Rings: The Return of the King'},
 {'imdb': {'rating': 8.9}, 'year': 1999, 'title': 'Fight Club'},
 {'imdb': {'rating': 8.9}, 'year': 1994, 'title': 'Pulp Fiction'},
 {'title': "Schindler's List", 'year': 1993, 'imdb': {'rating': 8.9}},
 {'imdb': {'rating': 8.8},
  'year': 1980,
  'title': 'Star Wars: Episode V - The Empire Strikes Back'},
 {'imdb': {'rating': 8.8},
  'year': 2001,
  'title': 'The Lord of the Rings: The Fellowship of the Ring'},
 {'imdb': {'rating': 8.8}, 'year': 2010, 'title': 'Inception'},
 {'imdb': {'rating': 8.8}, 'year': 1994, 't

### 3.3


Retrieve the title, production year, and IMDB rating of movies in which both **Morgan Freeman** and **Clint Eastwood** played a role (among other actors in those movies). Sort the returned documents by year in descending order.

In [43]:
list(
    client['sample_mflix']['movies'].find(
        filter={'$and':[{'cast':'Morgan Freeman'}, {'cast':'Clint Eastwood'}]},
        projection={'title':1, 'year':1, 'imdb.rating':1, '_id':0}).sort('year', -1)
)

[{'imdb': {'rating': 8.1}, 'year': 2004, 'title': 'Million Dollar Baby'},
 {'imdb': {'rating': 8.3}, 'year': 1992, 'title': 'Unforgiven'}]

### 3.4


Retrieve documents associated with movies which:

- are available in both German and French (among other languages), but not in English,
- are either rated above 8 according to IMDB, or above 7.5 according to the critic ratings of [Rotten Tomatoes](https://www.rottentomatoes.com/) (inspect the `tomatoes` field),
- have at least 50 Rotten Tomatoes critic reviews.

The returned documents should include the title, year, IMDB rating, Rotten Tomatoes critic rating and country of production fields. Sort the results by IMDB rating in descending order.

**Note:** Duplicates in the results are ok.

In [46]:
list(
    client['sample_mflix']['movies'].find(
        filter={'tomatoes.critic.numReviews':{'$gte':50}, 
                '$or':[{'imdb.rating':{'$gt':8}}, {'tomatoes.critic.rating':{'$gt':7.5}}],
                'languages':{'$all':['German', 'French'], '$nin':['English']}},
        projection={'title':1, 'year':1, 'imdb.rating':1, 'tomatoes.critic.rating':1, 'countries':1, '_id':0}
    ).sort('imdb.rating', -1)
)

[{'imdb': {'rating': 7.8},
  'year': 2013,
  'title': 'The Wind Rises',
  'tomatoes': {'critic': {'rating': 7.9}},
  'countries': ['Japan']},
 {'title': 'The Wind Rises',
  'year': 2013,
  'imdb': {'rating': 7.8},
  'countries': ['Japan'],
  'tomatoes': {'critic': {'rating': 7.9}}}]

### 3.5


Find the title and production year of the top 20 award-winning movies which have **not** been produced in USA, Canada, UK, or Australia.

**Note:** Duplicates are ok. Return 20 documents in any case.

In [49]:
list(
    client['sample_mflix']['movies'].find(
        filter={'countries':{'$nin':['USA', 'Canada', 'UK', 'Australia']},
                'awards.wins':{'$exists':True},
                'type':'movie'},
        projection={'title':1, 'year':1, '_id':0}
    ).sort('awards.wins', -1).limit(20)
)

[{'title': 'The Artist', 'year': 2011},
 {'title': 'Amour', 'year': 2012},
 {'title': 'Amour', 'year': 2012},
 {'title': 'A Separation', 'year': 2011},
 {'year': 2006, 'title': 'The Lives of Others'},
 {'title': 'Let the Right One In', 'year': 2008},
 {'year': 2002, 'title': 'City of God'},
 {'year': 2006, 'title': 'Volver'},
 {'title': 'Life Is Beautiful', 'year': 1997},
 {'title': 'The Sea Inside', 'year': 2004},
 {'year': 1997, 'title': 'Life Is Beautiful'},
 {'year': 2009, 'title': 'The White Ribbon'},
 {'year': 2001, 'title': 'Amèlie'},
 {'year': 2000, 'title': 'Amores Perros'},
 {'title': 'Shall We Dance?', 'year': 1996},
 {'title': 'Shall We Dance?', 'year': 1996},
 {'year': 2013, 'title': 'The Grandmaster'},
 {'year': 1999, 'title': 'All About My Mother'},
 {'year': 2009, 'title': 'The Secret in Their Eyes'},
 {'year': 2001, 'title': 'Spirited Away'}]

<img src="thanks.png" width="500">
