In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the data
data = pd.read_csv('IMDB-Movie-Data.csv')

# find top 3 movies with highest ratings in 2016
top_3_movies = data[data['Year'] == 2016].nlargest(3, 'Rating')
print(top_3_movies)

    Rank           Title                    Genre        Director  \
2    118          Dangal   Action|Biography|Drama   Nitesh Tiwari   
4     97   Kimi no na wa  Animation|Drama|Fantasy  Makoto Shinkai   
15   862  Koe no katachi  Animation|Drama|Romance    Naoko Yamada   

                                               Actors  Year  \
2   Aamir Khan| Sakshi Tanwar| Fatima Sana Shaikh|...  2016   
4   Ryunosuke Kamiki| Mone Kamishiraishi| Ryo Nari...  2016   
15     Miyu Irino| Saori Hayami| Aoi Yuki| Kensho Ono  2016   

    Runtime (Minutes)  Rating  Votes  Revenue (Millions)  Metascore  
2                 161     8.8  48969               11.15        NaN  
4                 106     8.6  34110                4.68       79.0  
15                129     8.4   2421                 NaN       80.0  


In [3]:
# find the actor generating the highest average revenue
actor_revenue = data.groupby('Actors')['Revenue (Millions)'].mean()
highest_avg_revenue = actor_revenue.idxmax()
print(highest_avg_revenue)

Daisy Ridley| John Boyega| Oscar Isaac| Domhnall Gleeson


In [4]:
# find the average rating of Emma Watson’s movies
emma_movies = data[data['Actors'].str.contains('Emma Watson')]
emma_avg_rating = emma_movies['Rating'].mean()
print(emma_avg_rating)

7.175000000000001


In [5]:
# find Top‐3 directors who collaborate with the most actors
top_3_directors = data.groupby('Director')['Actors'].nunique().nlargest(3)
print(top_3_directors)

Director
Ridley Scott          8
M. Night Shyamalan    6
Paul W.S. Anderson    6
Name: Actors, dtype: int64


In [6]:
# Top‐3 actors whose movies lead to the largest maximum gap of years
def max_gap(years):
    return max(years) - min(years)

# Split actors and calculate max_gap for each actor individually
actor_gaps = data['Actors'].str.split('|').explode().reset_index()
actor_gaps = actor_gaps.groupby('Actors')['index'].apply(lambda x: max_gap(data.loc[x, 'Year'])).reset_index()
actor_gaps.columns = ['Actors', 'max_gap']

# Sort by max_gap in descending order and get the top-3 actors
top_3_actors = actor_gaps.sort_values(by='max_gap', ascending=False).head(3)

print(top_3_actors)

               Actors  max_gap
823    Justin Theroux       10
2345       Tom Cruise       10
2380       Will Smith       10


In [8]:
from collections import defaultdict, deque
# Step 1: Initialize data structures
actor_collaborations = defaultdict(set)

# Step 2: Build the collaboration graph
for actors in data['Actors']:
    actors = actors.split('|')
    for actor in actors:
        actor_collaborations[actor].update(actors)

# Step 3: Initialize BFS
visited = set()
queue = deque(['Johnny Depp'])
collaborators = set()

# Step 4: Perform BFS
while queue:
    current_actor = queue.popleft()
    if current_actor not in visited:
        visited.add(current_actor)
        collaborators.update(actor_collaborations[current_actor])
        queue.extend(actor_collaborations[current_actor] - visited)

# Step 5: Remove Johnny Depp from the result
collaborators.discard('Johnny Depp')
print(collaborators)

{'Colin Farrell', ' Gabriella Wilde', ' John Corbett', ' Elijah Wood', ' Jared Leto', ' Graham McTavish', ' Mel Raido', ' Jake Gyllenhaal', ' Malin Akerman', ' Jessica Sula', ' Will Dalton', ' Eddie Baroo', ' Hugh Grant', ' Michelle Williams', ' Jason Momoa', 'Byung-hun Lee', ' Luke Kleintank', 'Gemma Arterton', 'Gabriella Wilde', ' Dustin Hoffman', 'Jessica Biel', ' Penn Badgley', ' Simon Helberg', 'Michael Shannon', ' Chris Cooper', ' Zooey Deschanel', ' Patricia Clarkson', 'Hugh Jackman', ' Gwyneth Paltrow', ' Tian Jing', ' Leonardo DiCaprio', ' Andy Lau', 'Ray Romano', ' Elizabeth Olsen', 'Steve Carell', ' Rodrigo Santoro', ' Dave Bautista', ' Dev Patel', ' Rose McGowan', ' Emmanuelle Chriqui', ' Felicity Jones', ' Christian Stolte', 'Sonoya Mizuno', 'Vera Farmiga', ' Julian Dennison', 'Topher Grace', ' Emilia Clarke', 'Scarlett Johansson', ' Jeremy Renner', ' Gerry Bednob', ' Jude Law', ' Su Elliot', 'Rainn Wilson', 'Christian Bale', ' Jai Courtney', ' Sheila Kelley', 'Annabeth Gi