### MangoDB

In [4]:
#import libraries
import pymongo
import json
from pymongo import MongoClient

In [5]:
#connect to local database server
client = MongoClient()

#switch to test DB
db = client.sakila

In [6]:
# function to print only first n documents (to avoid perf/memory issues)
def printhead(cursor, n):
    for idx,document in enumerate(cursor):
        if idx <= n: 
            print(document)
        else:
            break

1. List total number of customers living in California

In [None]:
db.customers.find({ "District": "California" }).count()

2. List all movies that are rated NC-17

In [None]:
db.films.find({"Rating": "NC-17"})

3. List the count of movies by category

In [None]:
db.films.aggregate([{"$group" : {_id:"$Category", count:{$sum:1}}}])

4. Find the top 2 movies with movie length greater than 25mins OR which has commentaries as a special feature

In [None]:
db.films.aggregate([{$sort: {Length: -1}},{$limit: 2},{$match: {$or: [ { 'Special Features' : "Commentaries" }, { Length: { $gt: 25 } }]}}])

5. Find the top 10 customers based on number of rentals

In [None]:
db.customers.aggregate({$unwind: "$Rentals"}, {$group: {_id:"$_id", ct:{$sum:1}}}, { $sort :{ ct: -1}}, {$limit:10})

6. Provide 5 additional queries and indicate the specific business use cases they address
 - Note: Insights should not be a flavor of the previously addressed queries within Assignment 4.

* 6.1 Retrieve all movies from the films collection whose rating equals either "G" or "PG-13"

In [None]:
db.films.find( { Rating: { $in: [ "G", "PG-13" ] } } )

* 6.2 Retrive all movies in the films collection where the category is "Family" and either length is greater than 100 or Rating starts with the character P

In [None]:
db.films.find( {
     Category: "Family",
     $or: [ { length: { $gt: 100 } }, { Rating: /^P/ } ]
} )

* 6.3 Retrive all films where contains actors Lucilie Tracy

In [None]:
db.films.find( { "Actors": {   "First name": "LUCILLE", "Last name": "TRACY", "actorId": 20 } } )

* 6.4 Retrive all stores where the Inventory array has contains the filmId whose value is less than or equal to 5

In [None]:
db.stores.find( { 'Inventory.filmId': { $lte: 5 } } )

* 6.5 Retrive films where any actorId nested in the Actors array has the field greater than 1 and less than or equal to 10

In [None]:
db.films.find( { "Actors.actorId": { $gt: 1,  $lte: 10 } } )

### Neo4j

1. Find all Producers that produced the movie When Harry Met Sally

In [None]:
MATCH (p:Person)-[:PRODUCED]->(m:Movie)
WHERE m.title ='When Harry Met Sally'
RETURN p.name as producer, m.title as movie

2. Find directors who have directed more than 2 movies

In [None]:
MATCH (d:Person)-[:DIRECTED]->(m:Movie)
WITH  d, count(m) AS numMovies, collect(m.title) AS movies
WHERE numMovies > 2
RETURN d.name, movies

3. Find the actors with 5+ movies, and the movies in which they acted

In [None]:
MATCH (a:Person)-[:ACTED_IN]->(m:Movie)
WITH a, count(m) AS numMovies, collect(m.title) AS movies
WHERE numMovies  > 5
RETURN a.name, movies

4. Movies and actors exactly 3 "hops" away from the movie Hoffa

In [None]:
MATCH (Hoffa:Movie {title:"Hoffa"})-[*3]-(person:Person)-[:ACTED_IN]->(movie:Movie)
RETURN distinct person.name as name, movie.title as title

5. Find all actors who have also directed movies and the movies that they
directed

In [None]:
MATCH (m:Movie)<-[:DIRECTED]-(a2:Person)
WHERE (m)<-[:ACTED_IN]-(a2)
WITH m, collect(a2.name) as directors
WITH m, directors, [(m)<-[:ACTED_IN]-(a1) WHERE NOT a1 IN directors | a1.name] as actors
RETURN directors as `Actor/Director(s)`, m.title as Movie

6. Provide 5 additional queries and indicate the specific business use cases they
address
 - Note: Insights should not be a flavor of the previously addressed queries within
Assignment 4.

* 6.1 Retrieve all Movie nodes.

In [None]:
MATCH (m:Movie) RETURN m

* 6.2 Retrieve all movies that Tom Hanks has acted in and the co-actors that acted in the same movie, returning the movie title and the list of co-actors that Tom Hanks worked with.

In [None]:
MATCH (p:Person)-[:ACTED_IN]->(m:Movie)<-[:ACTED_IN]-(p2:Person)
WHERE p.name ='Tom Hanks'
RETURN m.title as movie, collect(p2.name) AS `co-actors`

* 6.3 Retrieve all people who reviewed a movie, returning the list of reviewers and how many reviewers reviewed the movie.

In [None]:
MATCH (p:Person)-[:REVIEWED]->(m:Movie)
RETURN m.title as movie, count(p) as numReviews, collect(p.name) as reviewers

* 6.4 Retrieve all movies that Tom Cruise acted in, returning the title of the movie, the year the movie was released, the number of years ago that the movie was released, and the age of Tom when the movie was released.

In [None]:
MATCH (a:Person)-[:ACTED_IN]->(m:Movie)
WHERE a.name = 'Tom Cruise'
RETURN  m.title, m.released, date().year  - m.released as yearsAgoReleased, m.released  - a.born AS `age of Tom`
ORDER BY yearsAgoReleased

* 6.5 Retrieve all actors that acted in movies, and also retrieves the producers for those movies. Make sure there is no duplication of data and order the results based on the size of the list of actors.

In [None]:
MATCH (a:Person)-[:ACTED_IN]->(m:Movie),
      (m)<-[:PRODUCED]-(p:Person)
WITH  m, collect(DISTINCT a.name) AS actors, collect(DISTINCT p.name) AS producers
RETURN DISTINCT m.title, actors, producers
ORDER BY size(actors)