### read file

In [11]:
import pandas

MULTI  = ("Genre", "Actors")
INNER_SEP  = "|"

def strip_all(iterable: list[str]) -> tuple:
    return tuple(map(lambda x: x.strip(), iterable))

datas = pandas.read_csv("IMDB-Movie-Data.csv")
for title in MULTI:
    datas[title] = datas[title].apply(lambda x: strip_all(x.split(INNER_SEP)))

actors_info = datas.explode("Actors").groupby("Actors")  # unpack the tuple data in the "actors" column

### Question 1

In [12]:
print(
    "Top-3 movies with the highest ratings in 2016:", 
    ", ".join(datas[datas["Year"] == 2016].dropna(subset="Rating").nlargest(3, "Rating", keep="all")["Title"])
)

Top-3 movies with the highest ratings in 2016: Dangal, Kimi no na wa, Koe no katachi


### Question 2

In [13]:
# average revenue = sum(revenue) / (the number of movies this actor has appeared in)
print(
    "The actor generating the highest average revenue:", 
    ", ".join(actors_info["Revenue (Millions)"].mean().nlargest(1, keep="all").dropna().index)
)

The actor generating the highest average revenue: Daisy Ridley, John Boyega


### Question 3

In [14]:
print(
    "The average rating of Emma Watson’s movies:", 
    actors_info.get_group("Emma Watson")["Rating"].mean(skipna=True)
)

The average rating of Emma Watson’s movies: 7.175000000000001


### Question 4

In [15]:
print(
    "Top-3 directors who collaborate with the most actors:", 
    ", ".join(
        datas.groupby("Director")["Actors"].apply(lambda x: len(set(x.explode()))).nlargest(3, keep="all").dropna().index
    )
)

Top-3 directors who collaborate with the most actors: Ridley Scott, M. Night Shyamalan, Danny Boyle, Paul W.S. Anderson


### Question 5

In [16]:
print(
    "Top-2 actors playing in the most genres of movies:", 
    ", ".join(
        actors_info["Genre"].apply(lambda x: len(set(x.explode()))).nlargest(2, keep="all").dropna().index
    )
)

Top-2 actors playing in the most genres of movies: Brad Pitt, Amy Adams, Chloe Grace Moretz, Hugh Jackman, Johnny Depp, Scarlett Johansson


### Question 6

In [17]:
print(
    "Top-3 actors whose movies lead to the largest maximum gap of years:", 
    ", ".join(
        actors_info["Year"].apply(lambda x: max(x) - min(x)).nlargest(3, keep="all").index
    )
)

Top-3 actors whose movies lead to the largest maximum gap of years: Abbie Cornish, Anne Hathaway, Audrey Tautou, Ben Kingsley, Ben Whishaw, Bob Balaban, Brad Pitt, Bryce Dallas Howard, Chiwetel Ejiofor, Christian Bale, Christopher Plummer, Denzel Washington, Dominic West, Dustin Hoffman, Edward Norton, Ellen Burstyn, Emily Blunt, Eva Green, Gerard Butler, Hugh Jackman, Jack Davenport, Jennifer Aniston, Jennifer Connelly, Jeremy Irons, Jessica Biel, Johnny Depp, Judi Dench, Justin Theroux, Kang-ho Song, Kate Bosworth, Kevin Spacey, Kirsten Dunst, Luke Wilson, Marion Cotillard, Mark Wahlberg, Matt Damon, Maya Rudolph, Meryl Streep, Michelle Monaghan, Morgan Freeman, Owen Wilson, Paula Patton, Rachel Weisz, Russell Crowe, Sacha Baron Cohen, Samuel L. Jackson, Scarlett Johansson, Steve Carell, Tom Cruise, Tom Hanks, Toni Collette, Will Ferrell, Will Smith


### Question 7

In [18]:
final_collaborate_actors_set = set(datas["Actors"][datas["Actors"].apply(lambda x: "Johnny Depp" in x)].explode())
final_collaborate_actors_list = list(final_collaborate_actors_set)

for actor in final_collaborate_actors_list:
    difference = set(datas["Actors"][datas["Actors"].apply(lambda x: actor in x)].explode()) - final_collaborate_actors_set  # get difference between 2 sets
    if difference:  # difference != {}
        final_collaborate_actors_list += difference
        final_collaborate_actors_set |= difference

final_collaborate_actors_list.remove("Johnny Depp")

print(
    "All actors who collaborate with Johnny Depp in direct and indirect ways:", 
    ", ".join(final_collaborate_actors_list), 
)

All actors who collaborate with Johnny Depp in direct and indirect ways: Rebecca Hall, Mia Wasikowska, Jack Davenport, Cillian Murphy, Timothy Spall, Lily-Rose Depp, Christian Stolte, Anne Hathaway, Penelope Cruz, Orlando Bloom, Alan Rickman, Morgan Freeman, Michelle Pfeiffer, Eva Green, Harley Quinn Smith, Jason Clarke, Geoffrey Rush, Keira Knightley, Armie Hammer, William Fichtner, Adam Brody, Christian Bale, Joel Edgerton, Tom Wilkinson, Ian McShane, Helena Bonham Carter, Dakota Johnson, Benedict Cumberbatch, Maria Dizzia, Christopher Evan Welch, Jeremy Renner, Allison Tolman, Jon Hamm, Tracy Letts, Jim Broadbent, Ben Affleck, Michael C. Hall, Jason Bateman, Scarlett Johansson, Ciaran Hinds, Javier Bardem, Eric Bana, Charlie Hunnam, Tom Hiddleston, Jessica Chastain, Jamie Bell, Michael Fassbender, Su Elliot, Rosamund Pike, David Oyelowo, Tom Felton, Brie Larson, Chris Evans, Amanda Seyfried, Justin Timberlake, Brian Caspe, Rose Byrne, Ben Whishaw, Sharlto Copley, Chris Hemsworth, Ja