In [None]:
!pip install neo4j pandas py2neo networkx matplotlib plotly

In [None]:
from neo4j import GraphDatabase
import json
from py2neo import Graph
import networkx as nx
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import networkx as nx
from py2neo import Graph

In [None]:
uri = "neo4j://localhost/:7687"
user = "neo4j"
password = "password"

In [None]:
with open('personnes.json', 'r', encoding='utf-8') as file:
    personnes_data = json.load(file)

In [None]:
with open('movies.json', 'r', encoding='utf-8') as file:
    movies_data = json.load(file)

In [None]:
class Neo4jConnection:
    def __init__(self, uri, user, password):
        self._driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self._driver.close()

    def query(self, query, parameters=None):
        with self._driver.session() as session:
            result = session.run(query, parameters)
            return list(result)

    def execute_write(self, func, *args, **kwargs):
        with self._driver.session() as session:
            return session.write_transaction(func, *args, **kwargs)

In [None]:
conn = Neo4jConnection(uri, user, password)

In [None]:
def create_person(tx, person):
    tx.run(
        """
        CREATE (p:Person {
            name: $name,
            birthday: $birthday,
            birth_place: $birth_place,
            bio: $bio,
            id: $id
        })
        """,
        name=person['nom'],
        birthday=person.get('naissance', {}).get('anniversaire', ''),
        birth_place=person.get('naissance', {}).get('lieu', ''),
        bio=person.get('bio', ''),
        id=person['_id']
    )


In [None]:
def create_movie(tx, movie):
    tx.run("""
    CREATE (m:Movie {
        title: $title,
        year: $year,
        country: $country,
        language: $language,
        duration: $duration,
        summary: $summary,
        poster: $poster,
        id: $id
    })
    """,
    title=movie['titre'],
    year=movie['annee'],
    country=", ".join(movie['pays']) if isinstance(movie['pays'], list) else movie['pays'],
    language=movie.get('langue', ''),
    duration=movie['duree'],
    summary=movie['resume'],
    poster=movie.get('poster', ''),
    id=movie['_id'])

    # Create relationships for genres
    for genre in movie['genre']:
        tx.run("""
        MATCH (m:Movie {id: $id})
        MERGE (g:Genre {name: $genre})
        MERGE (m)-[:HAS_GENRE]->(g)
        """, id=movie['_id'], genre=genre)

    # Create relationships for director
    realisateur = movie.get('realisateur')
    if realisateur:
        tx.run("""
        MATCH (m:Movie {id: $id})
        MERGE (d:Director {id: $director_id, name: $director_name})
        MERGE (m)-[:DIRECTED_BY]->(d)
        """, id=movie['_id'], director_id=realisateur['_id'], director_name=realisateur['__text'])

    # Create relationships for writers
    scenaristes = movie.get('scenariste', [])
    if isinstance(scenaristes, list):
        for writer in scenaristes:
            tx.run("""
            MATCH (m:Movie {id: $id})
            MERGE (w:Writer {name: $writer})
            MERGE (m)-[:WRITTEN_BY]->(w)
            """, id=movie['_id'], writer=writer)
    elif isinstance(scenaristes, str):
        tx.run("""
        MATCH (m:Movie {id: $id})
        MERGE (w:Writer {name: $writer})
        MERGE (m)-[:WRITTEN_BY]->(w)
        """, id=movie['_id'], writer=scenaristes)

    # Create relationships for roles
    if 'role' in movie:
        for role in movie['role']:
            if isinstance(role, dict) and 'acteur' in role:
                actor = role['acteur']
                if isinstance(actor, dict):
                    tx.run("""
                    MATCH (m:Movie {id: $id})
                    MERGE (a:Actor {id: $actor_id, name: $actor_name})
                    MERGE (m)-[:HAS_ROLE {character: $character}]->(a)
                    """,
                    id=movie['_id'],
                    actor_id=actor.get('_id', ''),
                    actor_name=actor.get('__text', ''),
                    character=role['personnage'])
                elif isinstance(actor, str):
                    tx.run("""
                    MATCH (m:Movie {id: $id})
                    MERGE (a:Actor {name: $actor_name})
                    MERGE (m)-[:HAS_ROLE {character: $character}]->(a)
                    """,
                    id=movie['_id'],
                    actor_name=actor,
                    character=role['personnage'])

In [None]:
for person in personnes_data['personnes']['personne']:
    conn.execute_write(create_person, person)

In [None]:
for movie in movies_data['films']['film']:
    conn.execute_write(create_movie, movie)

In [None]:
def find_similar_movies(conn, movie_title, limit=10):
    query = """
    MATCH (m:Movie {title: $title})
    OPTIONAL MATCH (m)-[:HAS_GENRE]->(g:Genre)<-[:HAS_GENRE]-(similar:Movie)
    OPTIONAL MATCH (m)-[:DIRECTED_BY]->(d:Director)<-[:DIRECTED_BY]-(similar:Movie)
    OPTIONAL MATCH (m)-[:WRITTEN_BY]->(w:Writer)<-[:WRITTEN_BY]-(similar:Movie)
    OPTIONAL MATCH (m)-[:HAS_ROLE]->(a:Actor)<-[:HAS_ROLE]-(similar:Movie)
    WHERE m <> similar
    RETURN similar.title AS title,
           COUNT(DISTINCT g) AS shared_genres,
           COUNT(DISTINCT d) AS shared_directors,
           COUNT(DISTINCT w) AS shared_writers,
           COUNT(DISTINCT a) AS shared_actors,
           (COUNT(DISTINCT g) + COUNT(DISTINCT d) + COUNT(DISTINCT w) + COUNT(DISTINCT a)) AS similarity_score
    ORDER BY similarity_score DESC, similar.title ASC
    LIMIT $limit
    """
    result = conn.query(query, parameters={"title": movie_title, "limit": limit})
    return [record["title"] for record in result]

similar_movies = find_similar_movies(conn, "The Incredibles", 10)
print("Similar movies:", similar_movies)

In [None]:
graph = Graph(uri, auth=(user, password))
def get_movie_graph(movie_title):
    query = """
    MATCH (m:Movie {title: $title})
    OPTIONAL MATCH (m)-[:HAS_GENRE]->(g:Genre)
    OPTIONAL MATCH (m)-[:DIRECTED_BY]->(d:Director)
    OPTIONAL MATCH (m)-[:WRITTEN_BY]->(w:Writer)
    OPTIONAL MATCH (m)-[:HAS_ROLE]->(a:Actor)
    RETURN m, COLLECT(DISTINCT g) AS genres, COLLECT(DISTINCT d) AS directors,
           COLLECT(DISTINCT w) AS writers, COLLECT(DISTINCT a) AS actors
    """
    result = graph.run(query, title=movie_title).data()

    return result[0] if result else None

def visualize_movie_graph(movie_title):
    movie_data = get_movie_graph(movie_title)
    if not movie_data:
        print(f"Movie '{movie_title}' not found.")
        return

    G = nx.Graph()

    movie_node = movie_data['m']['title']
    G.add_node(movie_node, label='Movie')

    for genre in movie_data['genres']:
        if genre:
            G.add_node(genre['name'], label='Genre')
            G.add_edge(movie_node, genre['name'])

    for director in movie_data['directors']:
        if director:
            G.add_node(director['name'], label='Director')
            G.add_edge(movie_node, director['name'])

    for writer in movie_data['writers']:
        if writer:
            G.add_node(writer['name'], label='Writer')
            G.add_edge(movie_node, writer['name'])

    for actor in movie_data['actors']:
        if actor:
            G.add_node(actor['name'], label='Actor')
            G.add_edge(movie_node, actor['name'])

    pos = nx.spring_layout(G, k=0.5)
    labels = nx.get_node_attributes(G, 'label')
    color_map = {
        'Movie': 'blue',
        'Genre': 'green',
        'Director': 'red',
        'Writer': 'purple',
        'Actor': 'orange'
    }
    node_colors = [color_map[labels[node]] for node in G.nodes()]

    plt.figure(figsize=(12, 8))
    plt.gca().set_facecolor('#f0f0f0')
    nx.draw(G, pos, with_labels=True, node_color=node_colors, node_size=3000, font_size=8, font_color='white', edge_color='#cccccc')
    plt.title(f"Graph of '{movie_title}' and its Relationships", fontsize=15)
    plt.show()

# Visualize the graph for a specific movie
visualize_movie_graph("The Incredibles")

In [150]:
def visualize_movies_relation(main_movie_title, similar_movies):
    G = nx.Graph()

    # Add main movie node and its relations
    main_movie_data = get_movie_graph(main_movie_title)
    if not main_movie_data:
        print(f"Movie '{main_movie_title}' not found.")
        return

    main_movie_node = main_movie_data['m']['title']
    G.add_node(main_movie_node, label='Movie')

    for genre in main_movie_data['genres']:
        if genre:
            G.add_node(genre['name'], label='Genre')
            G.add_edge(main_movie_node, genre['name'])

    for director in main_movie_data['directors']:
        if director:
            G.add_node(director['name'], label='Director')
            G.add_edge(main_movie_node, director['name'])

    for writer in main_movie_data['writers']:
        if writer:
            G.add_node(writer['name'], label='Writer')
            G.add_edge(main_movie_node, writer['name'])

    for actor in main_movie_data['actors']:
        if actor:
            G.add_node(actor['name'], label='Actor')
            G.add_edge(main_movie_node, actor['name'])

    # Add similar movies nodes and their relations
    for similar_movie in similar_movies:
        similar_movie_data = get_movie_graph(similar_movie)
        if not similar_movie_data:
            continue

        similar_movie_node = similar_movie_data['m']['title']
        G.add_node(similar_movie_node, label='Movie')
        G.add_edge(main_movie_node, similar_movie_node, label='Similar')

        for genre in similar_movie_data['genres']:
            if genre:
                G.add_node(genre['name'], label='Genre')
                G.add_edge(similar_movie_node, genre['name'])

        for director in similar_movie_data['directors']:
            if director:
                G.add_node(director['name'], label='Director')
                G.add_edge(similar_movie_node, director['name'])

        for writer in similar_movie_data['writers']:
            if writer:
                G.add_node(writer['name'], label='Writer')
                G.add_edge(similar_movie_node, writer['name'])

        for actor in similar_movie_data['actors']:
            if actor:
                G.add_node(actor['name'], label='Actor')
                G.add_edge(similar_movie_node, actor['name'])

    # Calculate betweenness centrality
    centrality = nx.betweenness_centrality(G)
    most_central_element = max(centrality, key=centrality.get)

    # Remove nodes node with centrality biggest make G2
    G2 = G.copy()
    G2.remove_node(most_central_element)

    centrality = nx.betweenness_centrality(G2)
    most_central_element = max(centrality, key=centrality.get)
    print(f"The most central element is '{most_central_element}' with a betweenness centrality of {centrality[most_central_element]}'")

    non_movie_centralities = {node: cent for node, cent in centrality.items() if G.nodes[node]['label'] != 'Movie'}
    if non_movie_centralities:
        most_central_non_movie_element = max(non_movie_centralities, key=non_movie_centralities.get)
        print(f"The most central element that is not a movie is '{most_central_non_movie_element}' with a betweenness centrality of {non_movie_centralities[most_central_non_movie_element]}'")
    else:
        print("No non-movie elements found in the graph.")


    # Remove nodes not connected to any other movie node
    nodes_to_remove = []
    for node in G.nodes:
        if G.degree(node) == 0:
            nodes_to_remove.append(node)
        if G.degree(node) == 1 and list(G.neighbors(node))[0] == main_movie_node:
            nodes_to_remove.append(node)
        # remove actor nodes with no connections for every movie node
        if G.nodes[node]['label'] == 'Actor' and G.degree(node) == 1:
            nodes_to_remove.append(node)
        if G.nodes[node]['label'] == 'Writer' and G.degree(node) == 1:
            nodes_to_remove.append(node)
        if G.nodes[node]['label'] == 'Director' and G.degree(node) == 1:
            nodes_to_remove.append(node)
        if G.nodes[node]['label'] == 'Genre' and G.degree(node) == 1:
            nodes_to_remove.append(node)
    G.remove_nodes_from(nodes_to_remove)

    # Calculate node degrees
    degrees = dict(G.degree())

    pos = nx.spring_layout(G, k=0.5)
    edge_trace = []
    node_trace = go.Scatter(
        x=[],
        y=[],
        text=[],
        mode='markers+text',
        hoverinfo='text',
        textposition='top center',
        marker=dict(
            showscale=True,
            colorscale='YlGnBu',
            size=[],
            color=[],
            colorbar=dict(
                thickness=15,
                title='Node Connections',
                xanchor='left',
                titleside='right'
            )
        )
    )

    for node in G.nodes:
        x, y = pos[node]
        node_trace['x'] += (x,)
        node_trace['y'] += (y,)
        node_trace['text'] += (node,)
        node_trace['marker']['size'] += (5 + degrees[node],)
        node_trace['marker']['color'] += (degrees[node],)

    for edge in G.edges:
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_trace.append(
            go.Scatter(
                x=[x0, x1, None],
                y=[y0, y1, None],
                line=dict(width=0.5, color='#888'),
                hoverinfo='none',
                mode='lines'
            )
        )

    fig = go.Figure(data=edge_trace + [node_trace],
                    layout=go.Layout(
                        title=f"Graph of '{main_movie_title}' and Its Similar Movies",
                        titlefont_size=16,
                        showlegend=False,
                        hovermode='closest',
                        margin=dict(b=20, l=5, r=5, t=40),
                        annotations=[dict(
                            text="Movie Relationships",
                            showarrow=False,
                            xref="paper", yref="paper",
                            x=0.005, y=-0.002
                        )],
                        xaxis=dict(showgrid=False, zeroline=False),
                        yaxis=dict(showgrid=False, zeroline=False))
                    )

    fig.show()

In [154]:
# Find similar movies and visualize the graph
similar_movies = find_similar_movies(conn, "Superman", 10)
print("Similar movies:", similar_movies)
visualize_movies_relation("Superman", similar_movies)


Using a driver after it has been closed is deprecated. Future versions of the driver will raise an error.



Similar movies: ['Superman', 'Superman II', '2012', 'Aliens', 'Armageddon', 'Back to the Future Part II', 'G.I. Joe: The Rise of Cobra', 'Independence Day', 'Iron Man', 'Journey to the Center of the Earth']
The most central element is 'Sci-Fi' with a betweenness centrality of 0.2111563028604614'
The most central element that is not a movie is 'Sci-Fi' with a betweenness centrality of 0.2111563028604614'
