In [5]:
from py2neo import Graph

# Connect to Neo4j using bolt://localhost:7687
graph = Graph("bolt://localhost:7687", auth=("neo4j", "password"))

# Test the connection by running a basic query
result = graph.run("MATCH (n) RETURN COUNT(n)").data()
print(result)

[{'COUNT(n)': 56849}]


In [39]:
import pandas as pd

df_results = pd.read_csv("../data/cache/test_movies_query.csv")
# Extract tconst values from the df_results DataFrame (assuming the column name is 'tconst')
tconst_values = df_results["tconst"].tolist()

directors = []
actors = []
production_cos = []

# Iterate over each tconst and run the query
for tconst in tconst_values:
    # Prepare the dynamic list of tconsts for the query
    tconst_query_list = [tconst]

    # Construct the Cypher query for production companies
    cypher_query = f"""
    MATCH (m:Movie)-[:PRODUCED_BY]->(pc:ProductionCompany)
    WHERE m.tconst IN {tconst_query_list}
    WITH pc.name AS studio, count(*) AS count
    ORDER BY count DESC
    LIMIT 1
    RETURN studio
    """

    # Execute the query and process the result
    result = graph.run(cypher_query)

    # Extract and append the cleaned production company name
    for record in result:
        production_cos.append(record['studio'].strip())  # Strip spaces and append the cleaned production company name

    # Construct the Cypher query for directors
    cypher_query2 = f"""
    MATCH (d:Director)-[:DIRECTED]->(m:Movie)
    WHERE m.tconst IN {tconst_query_list}
    WITH d.name AS director, count(*) AS count
    ORDER BY count DESC
    LIMIT 1
    RETURN director
    """

    # Execute the query and process the result
    result = graph.run(cypher_query2)

    # Extract and append the cleaned director name
    for record in result:
        directors.append(record['director'].strip()) 

    # Construct the Cypher query for top 2 actors
    cypher_query3 = f"""
    MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)
    WHERE m.tconst IN {tconst_query_list}
    WITH a.name AS actor, count(*) AS appearances
    ORDER BY appearances DESC
    LIMIT 2
    RETURN actor
    """

    # Execute the query and process the result
    result = graph.run(cypher_query3)
    # Append the actors for this movie to the local list (clearing it at each iteration)
    actors_for_current_movie = []
    for record in result:
        actors_for_current_movie.append(record['actor'].strip())

    # Append actors list of the current movie to the main actors list
    actors.append(actors_for_current_movie)

# Print the results for directors, production companies, and actors
for record in zip(directors, production_cos, actors):
    print(f"Director: {record[0]}, Production Company: {record[1]}, Actor: {record[2]}")


Director: Paul W. S. Anderson, Production Company: 20th Century Fox, Actor: ['Ian Whyte', 'Tommy Flanagan']
Director: Todd Solondz, Production Company: Killer Films, Actor: ['Cynthia Stevenson', 'Gerry Becker']
Director: George Armitage, Production Company: Caravan Pictures, Actor: ['Dan Aykroyd', 'Hank Azaria']
Director: Tom Savini, Production Company: Columbia Pictures, Actor: ['Bill Moseley', 'Tony Todd']
Director: Jon Amiel, Production Company: Regency Enterprises, Actor: ['Sean Connery', 'Ving Rhames']
Director: Rachel Talalay, Production Company: New Line Cinema, Actor: ['Robert Englund', 'Yaphet Kotto']
Director: Tom Hanks, Production Company: 20th Century Fox, Actor: ['Charlize Theron', 'Liv Tyler']
Director: Martin Campbell, Production Company: Columbia Pictures, Actor: ['Antonio Banderas', 'Julio Oscar Mechoso']
Director: Alain Berliner, Production Company: Lakeshore Entertainment, Actor: ['William Fichtner', 'Stellan Skarsgård']
Director: Malcolm D. Lee, Production Company: 