In [None]:
!pip install networkx pandas time


[31mERROR: Could not find a version that satisfies the requirement time (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for time[0m[31m
[0m

In [None]:
import pandas as pd
import networkx as nx
import time

data = pd.read_csv('/content/IMDb_Data_final_processed.csv')

data.head()


Unnamed: 0,Title,Director,Stars,IMDb-Rating,Category,Duration,Censor-board-rating,ReleaseYear
0,Top Gun: Maverick,Joseph Kosinski,"Tom Cruise, Jennifer Connelly, Miles Teller, V...",8.6,"Action,Drama",130min,UA,2022
1,Everything Everywhere All at Once,"Dan Kwan,",", Michelle Yeoh, Stephanie Hsu, Ke Huy Quan, J...",8.3,"Action,Adventure,Comedy",139min,R,2022
2,The Batman,Matt Reeves,"Robert Pattinson, Zoë Kravitz, Jeffrey Wright,...",7.9,"Action,Crime,Drama",176min,UA,2022
3,Jurassic Park,Steven Spielberg,"Sam Neill, Laura Dern, Jeff Goldblum, Richard ...",8.2,"Action,Adventure,Sci-Fi",127min,UA,1993
4,The Godfather,Francis Ford Coppola,"Marlon Brando, Al Pacino, James Caan, Diane Ke...",9.2,"Crime,Drama",175min,A,1972


In [None]:
# Extract relationships for directors, stars, and genres
movie_director_edges = data[['Title', 'Director']].dropna().copy()
movie_director_edges = movie_director_edges.assign(Director=movie_director_edges['Director'].str.split(',')).explode('Director')
movie_director_edges['Director'] = movie_director_edges['Director'].str.strip()

movie_star_edges = data[['Title', 'Stars']].dropna().copy()
movie_star_edges = movie_star_edges.assign(Stars=movie_star_edges['Stars'].str.split(',')).explode('Stars')
movie_star_edges['Stars'] = movie_star_edges['Stars'].str.strip()

movie_genre_edges = data[['Title', 'Category']].dropna().copy()
movie_genre_edges = movie_genre_edges.assign(Category=movie_genre_edges['Category'].str.split(',')).explode('Category')
movie_genre_edges['Category'] = movie_genre_edges['Category'].str.strip()


In [None]:
# Create the graph
G = nx.Graph()

# Add nodes for movies, directors, stars, and genres
movies = data['Title'].dropna().unique()
directors = movie_director_edges['Director'].unique()
stars = movie_star_edges['Stars'].unique()
categories = movie_genre_edges['Category'].unique()

G.add_nodes_from(movies, node_type='movie')
G.add_nodes_from(directors, node_type='director')
G.add_nodes_from(stars, node_type='star')
G.add_nodes_from(categories, node_type='genre')

# Add edges for movie-director, movie-stars, and movie-genre relationships
G.add_edges_from([(row['Title'], row['Director']) for _, row in movie_director_edges.iterrows()], relation='directed_by')
G.add_edges_from([(row['Title'], row['Stars']) for _, row in movie_star_edges.iterrows()], relation='stars')
G.add_edges_from([(row['Title'], row['Category']) for _, row in movie_genre_edges.iterrows()], relation='belongs_to')

# Connect movie nodes based on shared directors, stars, or genres
for category, edges in [("Director", movie_director_edges), ("Stars", movie_star_edges), ("Category", movie_genre_edges)]:
    for shared_entity, group in edges.groupby(category):
        movies_in_group = group['Title'].tolist()
        for i in range(len(movies_in_group)):
            for j in range(i + 1, len(movies_in_group)):
                G.add_edge(movies_in_group[i], movies_in_group[j], relation=f'shared_{category.lower()}')

# Visualize Graph Statistics
print(f"Graph has {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")


Graph has 4257 nodes and 341737 edges.


In [None]:
#Auto-Detect Query Type Function
def detect_query_type(graph, query):
    query = query.strip()
    for node in graph.nodes(data=True):
        if node[0] == query:
            return node[1].get('node_type')
    return None


In [None]:
#Define Recommendation Function
def recommend_movies(graph, query, top_k=10):
    """
    Recommend movies based on the query.

    Parameters:
        graph (nx.Graph): The knowledge graph.
        query (str): The search term.
        top_k (int): Number of recommendations to return.

    Returns:
        list: Top-k recommended movies.
    """
    query = query.strip()
    query_type = detect_query_type(graph, query)
    if not query_type:
        return f"Query '{query}' did not match any known type.", []

    recommendations = []

    if query_type == 'movie':
        # For movie nodes, recommend directly connected movies
        recommendations = [
            neighbor for neighbor in graph.neighbors(query)
            if graph.nodes[neighbor].get('node_type') == 'movie'
        ]
        # Add the query movie itself as the top recommendation
        recommendations = [query] + recommendations
    else:
        # For non-movie nodes, recommend connected movies
        recommendations = [
            neighbor for neighbor in graph.neighbors(query)
            if graph.nodes[neighbor].get('node_type') == 'movie'
        ]

    # Return the top-k related movies
    return recommendations[:top_k]


In [None]:
#Process Multiple Queries with Latency Tracking
queries = [
    "Christopher Nolan",  # Director
    "Leonardo Di Caprio",  # Actor
    "Action",             # Genre
    "Inception",          # Movie
    "Quentin Tarantino",  # Director
    "Scarlett Johansson", # Actor
    "Drama",              # Genre
    "The Dark Knight",    # Movie
    "Steven Spielberg",   # Director
    "Comedy"              # Genre
]

for query in queries:
    start_time = time.time()
    recommendations = recommend_movies(G, query, top_k=7)
    end_time = time.time()
    latency = (end_time - start_time) * 1000  # Convert to milliseconds

    print(f"Query: {query}\nRecommendations:")
    if isinstance(recommendations, tuple):
        print(recommendations[0])
    else:
        for rec in recommendations:
            print(f"- {rec}")
    print(f"Latency: {latency:.2f} ms\n")


Query: Christopher Nolan
Recommendations:
- The Dark Knight
- Interstellar
- Inception
- The Dark Knight Rises
- The Prestige
- Dunkirk
- Batman Begins
Latency: 0.33 ms

Query: Leonardo Di Caprio
Recommendations:
- Titanic
- Once Upon a Time in... Hollywood
- Inception
- The Departed
- The Wolf of Wall Street
- Django Unchained
- Shutter Island
Latency: 0.37 ms

Query: Action
Recommendations:
- Top Gun: Maverick
- Everything Everywhere All at Once
- The Batman
- Jurassic Park
- Spider-Man: No Way Home
- RRR (Rise Roar Revolt)
- Pirates of the Caribbean: The Curse of the Black Pearl
Latency: 0.99 ms

Query: Inception
Recommendations:
- Inception
- The Dark Knight
- Interstellar
- The Dark Knight Rises
- The Prestige
- Dunkirk
- Batman Begins
Latency: 0.25 ms

Query: Quentin Tarantino
Recommendations:
- Once Upon a Time in... Hollywood
- Pulp Fiction
- Inglourious Basterds
- Django Unchained
- Kill Bill: Vol. 1
- Reservoir Dogs
- The Hateful Eight
Latency: 0.11 ms

Query: Scarlett Johans