In [1]:
from functions import connections, initialize

2025-01-28 20:46:45,206 - functions.connections - INFO - Connecting to Neo4j at bolt://localhost:7687 with user neo4j


True


In [2]:
NEO4J_DRIVER = connections.NEO4J_DRIVER
# verify connection
if NEO4J_DRIVER is None:
    print("Error connecting to Neo4j")

In [3]:
initialize.start()

2025-01-28 20:46:45,275 - functions.initialize - INFO - Initializing the application
2025-01-28 20:46:45,280 - functions.initialize - INFO - Loading pre-created embeddings
2025-01-28 20:46:45,332 - functions.data_preprocess - INFO - Embeddings already loaded and indexes created.
2025-01-28 20:46:45,371 - functions.initialize - INFO - Dropping missing data
2025-01-28 20:46:45,373 - functions.data_preprocess - DEBUG - Running query: plot_drop
2025-01-28 20:46:46,019 - functions.data_preprocess - DEBUG - Running query: bio_drop
2025-01-28 20:46:47,019 - functions.data_preprocess - DEBUG - Running query: poster_drop
2025-01-28 20:46:47,230 - functions.data_preprocess - DEBUG - Running query: year_drop
2025-01-28 20:46:47,249 - functions.data_preprocess - DEBUG - Running query: imdbRating_drop
2025-01-28 20:46:47,261 - functions.data_preprocess - INFO - Nodes with missing or incorrect embeddings dropped successfully.


Schema of the Database:

![schema](imgs/graph.png)

In [4]:
def run_cypher(query, parameters):
    with NEO4J_DRIVER.session() as session:
        result = session.run(query, parameters)
        data = result.data()
        return data

## Getting all Movies

In [5]:
def get_all_nodes_id():
    query = "MATCH (mov:Movie) RETURN elementid(mov) as id"
    result = run_cypher(query, {})
    return [record["id"] for record in result]

movie_ids = get_all_nodes_id()
print(movie_ids[-1])

4:31cc548a-e875-40d2-9ae3-7884192f4683:9143


## Neo4j Movie Recommendations

### Naive Movie Similarity (Basic Graph Traversal)

In [6]:
def similar_movies_genre(id):
    """
    Finding Movies in the same genre as the given movie, no ranking methods used
    """
    query = """
    MATCH (m:Movie)-[:IN_GENRE]->(g:Genre)<-[:IN_GENRE]-(rec:Movie)
    WHERE elementId(m) = $id
    WITH rec
    RETURN rec.title AS recommendation
    LIMIT 10
    """
    result = run_cypher(query, {"id": id})
    for record in result:
        print(record["recommendation"])

similar_movies_genre(movie_ids[-1])

Two Night Stand
Stretch
Boxtrolls, The
This Is Where I Leave You
Tusk
St. Vincent
Rewrite, The
Big Hero 6
What We Do in the Shadows
Let's Be Cops


### Using Plot Embeddings (OpenAI text-embedding-ada-002)

In [7]:
def drop_no_plots():
    query = """
    MATCH (m:Movie)
    WHERE m.plot IS NULL
    DETACH DELETE m
    """
    run_cypher(query, {})

In [8]:
def movie_node_proj_graph():
    """
    Drops existing projection graph if exists, and creates a new graph with Movie nodes and plotEmbedding property
    """
    drop_query = """
    CALL gds.graph.drop('movieGenreGraph', false) YIELD graphName;
    """
    run_cypher(drop_query, {})
    # query = """
    # CALL gds.graph.project(
    # 'movieGenreGraph',              
    # {
    #   Movie: {
    #     properties: ['plotEmbedding']
    #   },
    #   Genre: {}
    #   },               
    #   {
    #   IN_GENRE: {
    #     type: 'IN_GENRE',
    #     orientation: 'NATURAL'
    #   }
    # }
    # )
    # """
    # result = run_cypher(query, {})
    # return result

In [9]:
movie_node_projection = movie_node_proj_graph()
print(movie_node_projection)

None


# Collaborative Filtering Methods

In [10]:
def movie_user_recommendations_singular(id, rating):
    """
    id: movie node id
    Idea is, if a user has rated a movie highly (5.0), then find similar users who have rated the same movie highly, and recommend movies that they have rated highly
    If a user has rated a movie poorly (0.5), then find similar users who have rated the same movie poorly, and recommend movies that they have rated highly
    """
    # create a graph projection of Movie and User nodes, and Rated relationships
    # For the rated relationships, we will use the rating as the weight
    # The rating can be accessed by the rating property of the Rated relationship
    # drop_query = """
    # CALL gds.graph.drop('movieUserGraph', false) YIELD graphName;
    # """
    # run_cypher(drop_query, {})
    # query = """
    # CALL gds.graph.project(
    # 'movieUserGraph',              
    # {
    #   Movie: {
    #     properties: ['title']
    #   },
    #   User: {
    #     properties: ['userId']
    #   }
    #   },               
    #   {
    #   RATED: {
    #     type: 'RATED',
    #     orientation: 'NATURAL',
    #     properties: 'rating'
    #   }
    # }
    # )
    # """
    # result = run_cypher(query, {})
    # Find all the users who have rated the movie, and the rating they gave
    # Out of the users who have rated the movie, find the users who have rated the movie similarly to the given rating
    # Find the movies that these users have rated highly in the same genre
    query = """
    MATCH (m:Movie)
    WHERE elementId(m) = $id
    WITH m
    MATCH (m)<-[r:RATED]-(u:User)
    WHERE r.rating = $rating
    WITH u
    MATCH (u)-[r:RATED]->(rec:Movie)
    WHERE r.rating = 5.0
    WITH rec, COLLECT(elementId(u)) AS users
    RETURN DISTINCT elementId(rec) AS rec_id, rec AS recommendation, users
    ORDER BY rec.imdbVotes DESC
    LIMIT 20
    """
    result = run_cypher(query, {'id': id, 'rating': rating})
    return result

movie_user_recommendations = movie_user_recommendations_singular('4:31cc548a-e875-40d2-9ae3-7884192f4683:12', 5.0)
for record in movie_user_recommendations:
    print(record["recommendation"]["title"])

Shawshank Redemption, The
Fight Club
Pulp Fiction
Lord of the Rings: The Fellowship of the Ring, The
Forrest Gump
Matrix, The
Lord of the Rings: The Return of the King, The
Godfather, The
Lord of the Rings: The Two Towers, The
Seven (a.k.a. Se7en)
Gladiator
Star Wars: Episode IV - A New Hope
Silence of the Lambs, The
Saving Private Ryan
Schindler's List
Memento
Star Wars: Episode V - The Empire Strikes Back
American Beauty
Titanic
Godfather: Part II, The


# Content Filtering Methods

In [11]:
def plot_embedding_similarity_genre(id):
    """
    Leveraging plot embeddings to find similar movies in the same genre, ranking using cosine similarity score
    Uses the moviePlots index to find similar movies
    """
    query = """
    MATCH (source:Movie)
    WHERE elementId(source) = $id
    WITH source, source.plotEmbedding AS sourceVec
    MATCH (target:Movie)-[:IN_GENRE]->(g:Genre)<-[:IN_GENRE]-(source)
    WHERE target.plotEmbedding IS NOT NULL
    WITH source, target, g, gds.similarity.cosine(sourceVec, target.plotEmbedding) AS similarity
    RETURN DISTINCT elementId(source) as source_id, source, elementId(target) as target_id, target, similarity
    ORDER BY similarity DESC
    LIMIT 20
    """
    result = run_cypher(query, {'id': id})
    return result

similar_movies = plot_embedding_similarity_genre('4:31cc548a-e875-40d2-9ae3-7884192f4683:12')
for record in similar_movies:
    # print(record["target_id"], record["target"]["title"], record["similarity"])
    print(record.keys())

dict_keys(['source_id', 'source', 'target_id', 'target', 'similarity'])
dict_keys(['source_id', 'source', 'target_id', 'target', 'similarity'])
dict_keys(['source_id', 'source', 'target_id', 'target', 'similarity'])
dict_keys(['source_id', 'source', 'target_id', 'target', 'similarity'])
dict_keys(['source_id', 'source', 'target_id', 'target', 'similarity'])
dict_keys(['source_id', 'source', 'target_id', 'target', 'similarity'])
dict_keys(['source_id', 'source', 'target_id', 'target', 'similarity'])
dict_keys(['source_id', 'source', 'target_id', 'target', 'similarity'])
dict_keys(['source_id', 'source', 'target_id', 'target', 'similarity'])
dict_keys(['source_id', 'source', 'target_id', 'target', 'similarity'])
dict_keys(['source_id', 'source', 'target_id', 'target', 'similarity'])
dict_keys(['source_id', 'source', 'target_id', 'target', 'similarity'])
dict_keys(['source_id', 'source', 'target_id', 'target', 'similarity'])
dict_keys(['source_id', 'source', 'target_id', 'target', 'simila

In [12]:
def movie_user_recommendations_count_singular(id, rating):
    query = """
    MATCH (m:Movie)
    WHERE elementId(m) = $id
    WITH m
    MATCH (m)<-[r:RATED]-(u:User)  
    WITH u, m
    MATCH (u)-[r:RATED]->(rec:Movie)  
    WHERE elementID(m) <> elementID(rec)
    WITH rec, COUNT(u) AS user_count
    RETURN elementId(rec) AS rec_id, rec AS recommendation, user_count
    ORDER BY user_count DESC
    LIMIT 20
    """
    result = run_cypher(query, {'id': id, 'rating': rating})
    return result

movie_user_recommendations = movie_user_recommendations_count_singular('4:31cc548a-e875-40d2-9ae3-7884192f4683:12', 5.0)
for record in movie_user_recommendations:
    print(record["recommendation"]["title"])

Independence Day (a.k.a. ID4)
Pulp Fiction
Fargo
Twelve Monkeys (a.k.a. 12 Monkeys)
Star Wars: Episode IV - A New Hope
Forrest Gump
Jurassic Park
Silence of the Lambs, The
Twister
Toy Story
Mission: Impossible
Seven (a.k.a. Se7en)
Rock, The
Shawshank Redemption, The
Fugitive, The
Terminator 2: Judgment Day
Braveheart
Usual Suspects, The
True Lies
Apollo 13


In [13]:
def movie_similarity_jaccard(id_list):
    """
    This function uses jaccard similarity to find similar movies based on the genres, actors, and directors of the movies
    Greater the interseciton between the sets is the better the similarity
    param id_list: list of movie node ids (must be at least 2 elements in list to create a similarity)
    """
    if id_list is None or len(id_list) < 2:
        print("Please provide at least 2 movie ids")
        return []
    query = """
    MATCH (m1:Movie)-[:IN_GENRE|ACTED_IN|DIRECTED]->(x)<-[:IN_GENRE|ACTED_IN|DIRECTED]-(m2:Movie)
    WHERE elementId(m1) IN $id_list AND m1 <> m2
    RETURN elementId(m2) AS id, properties(m2) as movie, COUNT(x) AS score
    ORDER BY score DESC
    LIMIT 10
    """
    
    results = run_cypher(query, {'id_list': id_list})
    return results

movie_similarity = movie_similarity_jaccard(['4:31cc548a-e875-40d2-9ae3-7884192f4683:0', '4:31cc548a-e875-40d2-9ae3-7884192f4683:19'])

for record in movie_similarity:
    print(record["movie"]["title"], record["score"])

TMNT (Teenage Mutant Ninja Turtles) 7
The Lego Movie 7
Gnomeo & Juliet 6
Inside Out 6
One Piece Film: Strong World 6
Turbo 6
Zootopia 6
Boxtrolls, The 6
The Good Dinosaur 6
Home 6


In [14]:
movie_id_list = [0,5,7,8,11,12,15,17,18,19,20,21,22,24,25,26,27,28,29,30,31,32,34,35,36]

### Personalized PageRank doesn't work, graph is too sparse

In [15]:
def page_rank_recommendations(id_list):
    """
    Annoyingly, ids cannot be elementId, must be the old id
    Runs personalized pagerank on the movie graphs using the given movie ids as seed nodes
    Returns list of recommended movies
    param id_list: list of movie node ids
    """
    if id_list is None or len(id_list) < 1:
        print("Please provide at least 1 movie id")
        return []
    drop_query = """
    CALL gds.graph.drop('movieGraph', false) YIELD graphName;
    """
    run_cypher(drop_query, {})
    projection_graph_query = """
    CALL gds.graph.project(
        'movieGraph',
        ['Movie'],
        {
            ACTED_IN: {orientation: 'UNDIRECTED'},
            DIRECTED: {orientation: 'UNDIRECTED'},
            IN_GENRE: {orientation: 'UNDIRECTED'},
            RATED: {orientation: 'UNDIRECTED', properties: 'rating'}
        }
    )
    """
    run_cypher(projection_graph_query, {})
    
    query = """
    CALL gds.pageRank.stream('movieGraph', 
        {
            sourceNodes: $id_list,
            dampingFactor: 0.7
        }
    )
    YIELD nodeId, score
    WHERE score > 0 AND NOT ID(gds.util.asNode(nodeId)) IN $id_list
    RETURN elementId(gds.util.asNode(nodeId)) AS id, properties(gds.util.asNode(nodeId)) AS movie, score
    ORDER BY score DESC
    LIMIT 10;
    """
    
    results = run_cypher(query, {'id_list': id_list})
    run_cypher(drop_query, {})
    return results

page_rank_recommendations = page_rank_recommendations(movie_id_list)
for record in page_rank_recommendations:
    print(record["movie"]["title"], record["score"])
    



### Node Similarity/K-Nearest Neighbors on Poster Embeddings (for fun)

In [16]:
def knn_movie_recommendations(id_list):
    """
    Runs knn algorithm on the movie graphs using the given movie ids as source nodes
    Returns list of recommended movies
    param id_list: list of movie node ids
    """
    if id_list is None or len(id_list) < 1:
        print("Please provide at least 1 movie id")
        return []
    drop_query = """
    CALL gds.graph.drop('movieKNNGraph', false) YIELD graphName;
    """
    run_cypher(drop_query, {})
    projection_graph_query = """
    CALL gds.graph.project(
        'movieKNNGraph',
        ['Movie'],
        '*',
        {
            nodeProperties: ['posterEmbedding']
        }
    )
    """
    run_cypher(projection_graph_query, {})
    query = """
    CALL gds.knn.stream('movieKNNGraph', {
        nodeLabels: ['Movie'],
        nodeProperties: {
            posterEmbedding: 'COSINE'
        },
        topK: 10,
        randomSeed: 42,
        concurrency: 1
    }
    )
    YIELD node1, node2, similarity
    WHERE ID(gds.util.asNode(node1)) IN $id_list AND NOT ID(gds.util.asNode(node2)) IN $id_list
    RETURN elementId(gds.util.asNode(node2)) AS id, properties(gds.util.asNode(node2)) AS movie, similarity
    ORDER BY similarity DESC
    LIMIT 10;
    """
    
    results = run_cypher(query, {'id_list': id_list})
    run_cypher(drop_query, {})
    return results

knn_recommendations = knn_movie_recommendations(movie_id_list)
for record in knn_recommendations:
    print(record["movie"]["title"], record["similarity"])



Toy Story 2 0.9521079063415527
Toy Story 3 0.9397517442703247
Planet 51 0.9344040155410767
Big Trouble 0.9273042678833008
Dracula 2000 0.9267540574073792
Goldfinger 0.9191790223121643
Jimmy Neutron: Boy Genius 0.9181286692619324
Toy Story of Terror 0.9143679141998291
Shrek 0.913116455078125
Horror of Dracula (Dracula) 0.9123324751853943


### Graph Embeddings Node2Vec and GraphSage Implementation

In [33]:
def node_embeddings_movie_recommendations(id_list, embedding_option = 'node2vec'):
    """
    Runs node2vec algorithm or GraphSage on the movie graphs using the given movie ids as source nodes
    Returns list of recommended movies
    GraphSage to incorporate more node features and is more scalable, Node2Vec can only rely on structure (tradeoff)
    GraphSage for dymamic graphs, Node2Vec for static graphs
    param id_list: list of movie node ids
    embedding_option: 'GraphSage' or 'Node2Vec'. GraphSage is default
    """
    
    if embedding_option.lower() not in ['graphsage', 'node2vec']:
        raise ValueError("embedding_option must be 'GraphSage' or 'node2vec'")
    
    if id_list is None or len(id_list) < 1:
        print("Please provide at least 1 movie id")
        return []
    drop_query = """
    CALL gds.graph.exists('movieNode2VecGraph') YIELD exists
    WITH exists
    CALL apoc.do.when(
        exists,
        'CALL gds.graph.drop("movieNode2VecGraph") YIELD graphName RETURN graphName',
        'RETURN "Graph does not exist" AS graphName',
        {}
    ) YIELD value
    RETURN value.graphName;
    """
    run_cypher(drop_query, {})
    
    if embedding_option.lower() == 'node2vec':
        # Store the embeddings on Movie Nodes
        # check to see if embeddings are already stored
        
        check_embedding_query = """
        MATCH (m:Movie)
        WHERE m.node2vecMovieEmbedding IS NOT NULL
        RETURN COUNT(m) AS count
        """
        result = run_cypher(check_embedding_query, {})
        if result[0]['count'] > 0:
            print("Embeddings already exist")
        else:
            create_graph_query = """
            CALL gds.graph.project(
                'movieNode2VecGraph',
                ['Movie'],
                {
                    ACTED_IN: {orientation: 'UNDIRECTED'},
                    DIRECTED: {orientation: 'UNDIRECTED'},
                    IN_GENRE: {orientation: 'UNDIRECTED'},
                    RATED: {orientation: 'UNDIRECTED', properties: 'rating'}
                }
            )
            """
            run_cypher(create_graph_query, {})
            store_embedding_query = """
            CALL gds.node2vec.write('movieNode2VecGraph', {
                writeProperty: 'node2vecMovieEmbedding',
                embeddingDimension: 64,
                iterations: 5,
                walkLength: 100,
                inOutFactor: 3.0
                }
            )
            """
            run_cypher(store_embedding_query, {})
            run_cypher(drop_query, {})
        
        scale_embedding_query = """
        MATCH (m:Movie)
        WITH m, 
            reduce(s = 0.0, x IN m.node2vecMovieEmbedding | s + x^2) AS sumSquares
        SET m.normFactor = sqrt(sumSquares);
        """
        
        run_cypher(scale_embedding_query, {})
        
        scaler_calculation_query = """
        MATCH (m:Movie)
        WITH m, 
            reduce(s = 0.0, x IN m.node2vecMovieEmbedding | s + x^2) AS sumSquares
        SET m.normFactor = sqrt(sumSquares);
        """
        
        run_cypher(scaler_calculation_query, {})
        
        projection_graph_query = """
        CALL gds.graph.project(
            'movieNode2VecGraph',
            {
                Movie: {
                    properties: ['node2vecMovieEmbedding']
                }
            },
            {
                ACTED_IN: {orientation: 'UNDIRECTED'},
                DIRECTED: {orientation: 'UNDIRECTED'},
                IN_GENRE: {orientation: 'UNDIRECTED'},
                RATED: {orientation: 'UNDIRECTED', properties: 'rating'}
            }
        );
        """
        run_cypher(projection_graph_query, {})
        # query = """
        # CALL gds.node2vec.stream('movieNode2VecGraph', {
        #     embeddingDimension: 128,
        #     iterations: 5,
        #     walkLength: 100,
        #     inOutFactor: 2.0,
        # })
        # YIELD nodeId, embedding
        # RETURN gds.util.asNode(nodeId).title AS movie, embedding
        # """
        # results = run_cypher(query, {})
        
        
        
        
        # KNN for similar movies
        query = """
        CALL gds.knn.stream('movieNode2VecGraph', {
            nodeLabels: ['Movie'],
            nodeProperties: ['node2vecMovieEmbedding'],
            topK: 10
            })
        YIELD node1, node2, similarity
        WHERE ID(gds.util.asNode(node1)) IN $id_list AND NOT ID(gds.util.asNode(node2)) IN $id_list
        RETURN elementID(gds.util.asNode(node1)) AS source_id, gds.util.asNode(node1) as source, elementID(gds.util.asNode(node2)) AS target_id, gds.util.asNode(node2) as target, similarity
        ORDER BY similarity DESC
        LIMIT 10
        """
        results = run_cypher(query, {'id_list': id_list})
        
        # run_cypher(drop_query, {})
        return results
    elif embedding_option.lower() == 'graphsage':
        drop_graphsage_query = """
        CALL gds.graph.drop('movieGraphSageGraph', false) YIELD graphName;
        """
        run_cypher(drop_graphsage_query, {})
        drop_model_query = """
        CALL gds.beta.model.drop('movieGraphSageModel', false) YIELD modelInfo;
        """
        run_cypher(drop_model_query, {})
        
        check_embedding_query = """
        MATCH (m:Movie)
        WHERE m.graphsageMoviePlotEmbedding IS NOT NULL
        RETURN COUNT(m) AS count
        """
        check_result = run_cypher(check_embedding_query, {})
        if check_result[0]['count'] > 0:
            print("Embeddings already exist")
        else:
            graph_projection_query = """
            CALL gds.graph.project(
                'movieGraphSageGraph',
                {
                    Movie: {
                        properties: ['plotEmbedding', 'imdbRating', 'year']
                    },
                    Person: {},
                    Genre: {},
                    User: {}
                },
                {
                    ACTED_IN: {orientation: 'UNDIRECTED'},
                    DIRECTED: {orientation: 'UNDIRECTED'},
                    IN_GENRE: {orientation: 'UNDIRECTED'},
                    RATED: {orientation: 'UNDIRECTED', properties: 'rating'}
                }
            );
            """
            run_cypher(graph_projection_query, {})
        
            # Please note that the projectedFeatureDimension should be included to bypass the argument of properties that exist for each label are error
            train_graphsage_query = """
            CALL gds.beta.graphSage.train(
                'movieGraphSageGraph',
                {
                    modelName: 'movieGraphSageModel',
                    featureProperties: ['plotEmbedding', 'imdbRating', 'year'],
                    embeddingDimension: 128,
                    activationFunction: 'relu',
                    aggregator: 'pool',
                    epochs: 20,
                    batchSize: 64,
                    projectedFeatureDimension: 32
                })
            """
            run_cypher(train_graphsage_query, {})
            
            embedding_scaler = """
            MATCH (m:Movie)
            WITH m, 
                reduce(s = 0.0, x IN m.graphsageMoviePlotEmbedding | s + x^2) AS sumSquares
            SET m.normFactor = sqrt(sumSquares);
            """
            
            run_cypher(embedding_scaler, {})
            
            scaler_calculation_query = """
            MATCH (m:Movie)
            WHERE m.normFactor IS NOT NULL AND m.normFactor > 0
            SET m.graphsageMoviePlotEmbedding = 
                [x IN m.graphsageMoviePlotEmbedding | x / m.normFactor];
            """
            
            run_cypher(scaler_calculation_query, {})
        
        
            store_graphsage_embedding_query = """
            CALL gds.beta.graphSage.write(
                'movieGraphSageGraph',
                {
                    writeProperty: 'graphsageMoviePlotEmbedding',
                    modelName: 'movieGraphSageModel'
                }
            )
            """
            
            run_cypher(store_graphsage_embedding_query, {})
            
            run_cypher(drop_graphsage_query, {})
        
       
        
        graph_projection_query = """
        CALL gds.graph.project(
            'movieGraphSageGraph',
            {
                Movie: {
                    properties: ['graphsageMoviePlotEmbedding']
                },
                Person: {},
                Genre: {},
                User: {}
            },
            {
                ACTED_IN: {orientation: 'UNDIRECTED'},
                DIRECTED: {orientation: 'UNDIRECTED'},
                IN_GENRE: {orientation: 'UNDIRECTED'},
                RATED: {orientation: 'UNDIRECTED', properties: 'rating'}
            }
        );
        """
        run_cypher(graph_projection_query, {})
        
        query = """
        CALL gds.knn.stream('movieGraphSageGraph', {
            nodeLabels: ['Movie'],
            nodeProperties: ['graphsageMoviePlotEmbedding'],
            topK: 20
            })
        YIELD node1, node2, similarity
        WHERE ID(gds.util.asNode(node1)) IN $id_list AND NOT ID(gds.util.asNode(node2)) IN $id_list
        RETURN elementID(gds.util.asNode(node1)) AS source_id, gds.util.asNode(node1) as source, elementID(gds.util.asNode(node2)) AS target_id, gds.util.asNode(node2) as target, similarity
        ORDER BY similarity DESC
        LIMIT 10
        """
        results = run_cypher(query, {'id_list': id_list})
        return results
    return 'Method not supported'
    

In [34]:
# testing node2vec_recommendations
node2vec_recommendations = node_embeddings_movie_recommendations(movie_id_list, 'node2vec')
for record in node2vec_recommendations:
    print(record["target"]["title"], record["similarity"])

Embeddings already exist




Way We Were, The 0.7551795244216919
Frozen Ground, The 0.7511285543441772
Awfully Big Adventure, An 0.749812126159668
Draughtsman's Contract, The 0.748426079750061
Tin Cup 0.7457367777824402
My Sister's Keeper 0.7414945363998413
City of Ember 0.7409699559211731
Airplane! 0.740257740020752
Passenger 57 0.7401936650276184
Dylan Moran Live: What It Is 0.7399937510490417


#### Comment: GraphSage to be worked on, the usecase here might not be suitable

In [35]:
# testing graphsage
graphsage_recommendations = node_embeddings_movie_recommendations(movie_id_list, 'graphsage')
for record in graphsage_recommendations:
    print(record["target"]["title"], record["similarity"])



Truth About Cats & Dogs, The 0.9999999999999856
Perez Family, The 0.9999999999999695
Sphere 0.9999999999999575
Air Force One 0.9999999999999563
Craft, The 0.9999999999999543
Great White Hype, The 0.9999999999999516
Truth About Cats & Dogs, The 0.9999999999999477
Shallow Grave 0.999999999999944
Apostle, The 0.9999999999999294
Substitute, The 0.9999999999999287
