In [1]:
import pandas as pd
from ast import literal_eval
from py2neo import Graph, Node, Relationship



def get_unique_strings(list_of_lists):
    unique_strings = set()
    for sublist in list_of_lists:
        for item in sublist:
            if isinstance(item, str):
                unique_strings.add(item)
    return list(unique_strings)



df = pd.read_csv('../data/movies.csv')

df['reviews'] = df['reviews'].apply(literal_eval)
df['review_user'] = df['review_user'].apply(literal_eval)
df['review_score'] = df['review_score'].apply(literal_eval)
df['directors'] = df['directors'].apply(lambda x: x.split(', '))
df['genre'] = df['genre'].apply(lambda x: x.split(', '))
df['actors'] = df['actors'].apply(lambda x: x.split(', '))
df['publishers'] = df['publishers'].apply(literal_eval)


unique_directors = get_unique_strings(df['directors'])
unique_genres = get_unique_strings(df['genre'])
unique_actors = get_unique_strings(df['actors'])
unique_publishers = get_unique_strings(df['publishers'])


review_dict_list = []
for reviews, scores in zip(df['reviews'], df['review_score']):
    for review, rating in zip(reviews, scores):
        review_dict_list.append({'review': review, 'rating': rating})



# Connect to the Neo4j database
graph = Graph("bolt://localhost:7687", auth=("neo4j", "12345678"))

print('Creating Genre, Director, Actor and Publisher nodes...')

#Create genre nodes
genre_nodes = {}
for genre in unique_genres:
    genre_node = Node("Genre", Genre=genre)
    graph.create(genre_node)
    genre_nodes[genre] = genre_node
    
# Create directors nodes
director_nodes = {}
for director in unique_directors:
    director_node = Node("Director",Name=director)
    graph.create(director_node)
    director_nodes[director] = director_node
    
# Create actors nodes
actor_nodes = {}
for actor in unique_actors:
    actor_node = Node("Actor", Name=actor)
    graph.create(actor_node)
    actor_nodes[actor] = actor_node

# Create publishers nodes
publisher_nodes = {}
for publisher in unique_publishers:
    publisher_node = Node("Publisher", Name=publisher)
    graph.create(publisher_node)
    publisher_nodes[publisher] = publisher_node
    
print('Genres, Actors, Directors and Publisher nodes complete!')

    
# Create nodes for movies, reviews, and users
count = 0
for index, row in df.iterrows():
    count+=1
    # Create a movie node
    movie_node = Node("Movie", Id=row['id'], Title=row['title'], Rating=row['rating'], Summary=row['summary'], Release_year=row['release year'], Runtime=row['runtime'], Certificate=row['certificate'], Poster=row['Poster'], Price=row['price'])
    graph.create(movie_node)
    print(f"Movie {count} done!")

    # Create review and user nodes and relationships
    for review, rating, user in zip(row['reviews'], row['review_score'], row['review_user']):
        review_node = Node("Review", Content=review, Rating=rating)
        graph.create(review_node)

        relationship = Relationship(review_node, "FOR", movie_node)
        graph.create(relationship)

        user_node = Node("User", Username=user)
        graph.create(user_node)

        relationship = Relationship(user_node, "WROTE", review_node)
        graph.create(relationship)

    # Create genre nodes and relationships
    genres = row['genre']
    for genre in genres:
        if genre in genre_nodes:
            genre_node = genre_nodes[genre]
            relationship = Relationship(movie_node, "HAS", genre_node)
            graph.create(relationship)

    # Create director nodes and relationships
    directors = row['directors']
    for director in directors:
        if director in director_nodes:
            director_node = director_nodes[director]
            relationship = Relationship(director_node, "INSTRUCTED", movie_node)
            graph.create(relationship)

    # Create actor nodes and relationships
    actors = row['actors']
    for actor in actors:
        if actor in actor_nodes:
            actor_node = actor_nodes[actor]
            relationship = Relationship(actor_node, "STARRED_IN", movie_node)
            graph.create(relationship)
            relationship = Relationship(movie_node, "FEATURES", actor_node)
            graph.create(relationship)

    # Create publisher nodes and relationships
    publishers = row['publishers']
    for publisher in publishers:
        if publisher in publisher_nodes:
            publisher_node = publisher_nodes[publisher]
            relationship = Relationship(publisher_node, "PUBLISHED", movie_node)
            graph.create(relationship)
        
print('Graph completed!')

Creating Genre, Director, Actor and Publisher nodes...
Genres, Actors, Directors and Publisher nodes complete!
Movie 1 done!
Movie 2 done!
Movie 3 done!
Movie 4 done!
Movie 5 done!
Movie 6 done!
Movie 7 done!
Movie 8 done!
Movie 9 done!
Movie 10 done!
Movie 11 done!
Movie 12 done!
Movie 13 done!
Movie 14 done!
Movie 15 done!
Movie 16 done!
Movie 17 done!
Movie 18 done!
Movie 19 done!
Movie 20 done!
Movie 21 done!
Movie 22 done!
Movie 23 done!
Movie 24 done!
Movie 25 done!
Movie 26 done!
Movie 27 done!
Movie 28 done!
Movie 29 done!
Movie 30 done!
Movie 31 done!
Movie 32 done!
Movie 33 done!
Movie 34 done!
Movie 35 done!
Movie 36 done!
Movie 37 done!
Movie 38 done!
Movie 39 done!
Movie 40 done!
Movie 41 done!
Movie 42 done!
Movie 43 done!
Movie 44 done!
Movie 45 done!
Movie 46 done!
Movie 47 done!
Movie 48 done!
Movie 49 done!
Movie 50 done!
Movie 51 done!
Movie 52 done!
Movie 53 done!
Movie 54 done!
Movie 55 done!
Movie 56 done!
Movie 57 done!
Movie 58 done!
Movie 59 done!
Movie 60 done

Movie 513 done!
Movie 514 done!
Movie 515 done!
Movie 516 done!
Movie 517 done!
Movie 518 done!
Movie 519 done!
Movie 520 done!
Movie 521 done!
Movie 522 done!
Movie 523 done!
Movie 524 done!
Movie 525 done!
Movie 526 done!
Movie 527 done!
Movie 528 done!
Movie 529 done!
Movie 530 done!
Movie 531 done!
Movie 532 done!
Movie 533 done!
Movie 534 done!
Movie 535 done!
Movie 536 done!
Movie 537 done!
Movie 538 done!
Movie 539 done!
Movie 540 done!
Movie 541 done!
Movie 542 done!
Movie 543 done!
Movie 544 done!
Movie 545 done!
Movie 546 done!
Movie 547 done!
Movie 548 done!
Movie 549 done!
Movie 550 done!
Movie 551 done!
Movie 552 done!
Movie 553 done!
Movie 554 done!
Movie 555 done!
Movie 556 done!
Movie 557 done!
Movie 558 done!
Movie 559 done!
Movie 560 done!
Movie 561 done!
Movie 562 done!
Movie 563 done!
Movie 564 done!
Movie 565 done!
Movie 566 done!
Movie 567 done!
Movie 568 done!
Movie 569 done!
Movie 570 done!
Movie 571 done!
Movie 572 done!
Movie 573 done!
Movie 574 done!
Movie 57