In [1]:
import pymongo
import pprint
import dateparser
from bson.son import SON

course_cluster_uri = "mongodb://agg-student:agg-password@cluster0-shard-00-00-jxeqq.mongodb.net:27017,cluster0-shard-00-01-jxeqq.mongodb.net:27017,cluster0-shard-00-02-jxeqq.mongodb.net:27017/test?ssl=true&replicaSet=Cluster0-shard-0&authSource=admin"
course_client = pymongo.MongoClient(course_cluster_uri)
movies = course_client['aggregations']['movies']

## Lab : $graphLookup

For this lab, you'll be calculating the [degrees of separation](https://en.wikipedia.org/wiki/Six_degrees_of_separation) of directors to "Steven Spielberg".

This is a bit like calculating a ["Kevin Bacon" number](https://en.wikipedia.org/wiki/Six_Degrees_of_Kevin_Bacon), but instead of all connections you will only consider connections through the `directors` graph nodes.

Complete the the `$graphLookup` and `$project` stages by correctly constructing the `graph_lookup` and `project_cast` variables below. 

To optimize the execution of `$graphLookup` stage, use a `maxDepth` of 6.

For the solution, only provide the numeric portion of the returned output to the validator.

**HINT**: `$reduce` is a powerful expression!

In [2]:
movies.find_one()

{'_id': ObjectId('573a1390f29313caabcd4192'),
 'title': 'The Conjuring of a Woman at the House of Robert Houdin',
 'year': 1896,
 'runtime': 1,
 'cast': ["Jeanne d'Alcy", 'Georges M�li�s'],
 'plot': 'A woman disappears on stage.',
 'fullplot': 'An elegantly dressed man enters through a stage door onto a set with decorated back screen, a chair and small table. He brings a well-dressed women through the door, spreads a newspaper on the floor, and places the chair on it. She sits and fans herself; he covers her with a diaphanous cloth. She disappears; he tries to conjure her back with incomplete results. Can he go beyond the bare bones of a conjuring trick and succeed in the complete reconstitution of a the lady?',
 'lastupdated': '2015-08-26 00:05:55.493000000',
 'type': 'movie',
 'directors': ['Georges M�li�s'],
 'imdb': {'rating': 6.3, 'votes': 759, 'id': 75},
 'countries': ['France'],
 'genres': ['Short'],
 'tomatoes': {'viewer': {'rating': 3.7, 'numReviews': 59},
  'lastUpdated': dat

In [4]:
## find connections between all directors
graph_lookup = {
    "$graphLookup": {
      "from": "movies",
      "startWith": "$directors",
      "connectFromField": "directors",
      "connectToField": "directors",
      "as": "network",
      "maxDepth": 6,
      "depthField": "network_level",
      "restrictSearchWithMatch": {}
    }
}

## Concatenate all arrays of "casts of different movies" into one array 
project_cast = {
    "$project": { 
        "cast": {
            "$reduce": {
                "input": "$cast",
                "initialValue": [],
                "in": { "$concatArrays" : ["$$value","$$this"]
                      }
            }
        }
    }
}


results = movies.aggregate([
    {   
    ## Filter results only for documents having "Steven Spielberg" as one of directors
        "$match": {
            "directors": "Steven Spielberg"
        }
    },
    
    ## Project field "directors", "_id" is not ommited - hence passed too
    {
        "$project": {
            "directors": 1
        }
    },
    
    ## find connections between all directors
    graph_lookup,
    
    # Unwind "network" - connectedness between directors
    {
        "$unwind": "$network"
    },
    
    ## project 
    ## 1. cast for a movie & 
    ## 2. level of networking between a film's directors and "Steven Spielberg"
    {
        "$project": {
            "cast": "$network.cast",
            "level": "$network.network_level"
        }
    },
    
    ## 1. Group by level of networking between a film's directors and "Steven Spielberg"
    ## 2. combine all casts in movies having level of networking between a film's directors and "Steven Spielberg"
    ## into single set "cast"
    {
        "$group": {
            "_id": "$level",
            "cast": {"$addToSet": "$cast"}
        }
    },
    
    ## ravel array "cast"
    project_cast,
    
    ## filter only casts comprising "Woody Harrelson"
    {
        "$match": {
            "cast": "Woody Harrelson"
        }
    },
    
    ## sort by id; which is level of networking
    {
        "$sort": {
            "_id": 1
        }
     },
    
    ## project level of networking as answer
    {
        "$project": {
            "_id": 0,
            "answer": "$_id"
        }
    },
    
    ## limit result to first value - minimum level of connectedness
    {
        "$limit": 1
    }
])

list(results)[0]

{'answer': 2}