In [1]:
import csv
from collections import defaultdict
from dotenv import load_dotenv
import os
import requests


In [2]:
load_dotenv()
API_KEY = os.getenv('API_KEY')

In [3]:
import pickle

def pickling(path, data):
    file = open(path,'wb')
    pickle.dump(data,file)

def unpickling(path):
    file = open(path, 'rb')
    b = pickle.load(file)
    return b

In [4]:
def get_movie_id(movie_name, movie_year):
    """Gets the movie_id from TMDB using the movie_name and year

    Args:
        movie_name (str): Movie's name
        movie_year (str): Movie's release year

    Returns:
        int: Movie TMDB id
    """

    r = requests.get(f'https://api.themoviedb.org/3/search/movie?api_key={API_KEY}&language=en-US&page=1&include_adult=false', params={'query':movie_name, 'year':movie_year})
    movie_id = r.json()['results'][0]['id']
    return movie_id

def get_movie_actors(movie_name, movie_year, limit=20):
    """Returns the top 'limit' movie actors

    Args:
        movie_name (str): Movie's name
        movie_year (str): Movie's release year
        limit (int, optional): Actors limit. Defaults to 20.

    Returns:
        str[]: A list of movie actors
    """

    actors = []

    movie_id = get_movie_id(movie_name, movie_year)
    r = requests.get(f'https://api.themoviedb.org/3/movie/{movie_id}/credits?api_key={API_KEY}&language=en-US&')

    actors_json = r.json()['cast'][:limit]
    for actor in actors_json:
        actors.append(actor['name'])

    return actors

In [5]:
actors= get_movie_actors("mother !", "2017")
actors

['Jennifer Lawrence',
 'Javier Bardem',
 'Ed Harris',
 'Michelle Pfeiffer',
 'Brian Gleeson',
 'Domhnall Gleeson',
 'Jovan Adepo',
 'Amanda Chiu',
 'Patricia Summersett',
 'Eric Davis',
 'Raphael Grosz-Harvey',
 'Emily Hampshire',
 'Abraham Aronofsky',
 'Luis Oliva',
 'Stephanie Ng Wan',
 'Chris Gartin',
 'Stephen McHattie',
 'Ambrosio De Luca',
 'Gregg Bello',
 'Arthur Holden']

In [6]:
%%time
# get movies name and year from csv
movies_list = []
with open('top_movies_list.csv', newline='') as file:
    reader = csv.reader(file)
    for idx, movie in enumerate(reader):
        movies_list.append(movie)
        
print(len(movies_list))

1200
CPU times: total: 31.2 ms
Wall time: 3.99 ms


In [7]:
movies_list[0][1]

'Avatar'

In [11]:
actors_data = unpickling("data.pckl")

In [13]:
movies_list[0]

['1', 'Avatar', '2009', '$2881837181']

In [15]:
%%time

actors_map = defaultdict(int)
actors_movies_map = defaultdict(list)

for idx, movie in enumerate(movies_list):
    try:
        # Get the movies actors
        movie_actors = actors_data[idx]

        # Loop through the actors and add the movies sale to them
        for actor in movie_actors:
            try:
                actors_map[actor] += int(movie[3][1:]) # 1: to remove the $ sign
            except:
                actors_map[actor] += 0
            actors_movies_map[actor].append(movie[1])
        
        # if idx >= 200:
        #     break
    except:
        print(idx, movie)

pickling("actors_map.pckl", actors_map)
pickling("actors_movies_map.pckl", actors_movies_map)

CPU times: total: 46.9 ms
Wall time: 57.8 ms


In [16]:
def print_actors_map(actors_map, actors_movies_map, limit=20):
    sorted_actors_map = dict(sorted(actors_map.items(), key=lambda x: x[1], reverse=True))
    idx = 0
    for actor_name, profit in sorted_actors_map.items():
        movies_list = actors_movies_map[actor_name]
        print(idx, actor_name, profit, movies_list)
        idx += 1
        if idx > limit:
            break

In [20]:
from IPython.display import display, HTML
import pandas as pd
import numpy as np

def pretty_print(df):
    return display(HTML(df.to_html().replace("\\n","<br>")))

def print_actors_map_pd(actors_map, actors_movies_map, limit=200):

    sorted_actors_map = dict(sorted(actors_map.items(), key=lambda x: x[1], reverse=True))

    data = []
    idx = 0
    for actor_name, profit in sorted_actors_map.items():
        movies_list = actors_movies_map[actor_name]
        data.append([actor_name, f"${profit:,}", "\n".join(movies_list)])
        idx += 1
        if idx > limit:
            break

    df = pd.DataFrame(data, columns=["Actor Name", "Profit", "Movies"])
    # df = df.style.set_properties(**{'text-align': 'left'})
    pretty_print(df)

In [21]:
print_actors_map_pd(actors_map, actors_movies_map)

Unnamed: 0,Actor Name,Profit,Movies
0,Samuel L. Jackson,"$16,338,817,882",The Avengers Avengers: Age of Ultron Incredibles 2 Spider-Man: Far from Home Captain Marvel Jurassic Park Star Wars: Episode III - Revenge of the Sith Captain America: The Winter Soldier Star Wars: Episode II - Attack of the Clones The Incredibles Iron Man 2 Kong: Skull Island Django Unchained Kingsman: The Secret Service Captain America: The First Avenger Die Hard with a Vengeance The Legend of Tarzan xXx: Return of Xander Cage Miss Peregrine's Home for Peculiar Children Coming to America Turbo xXx Unbreakable Glass Jumper Pulp Fiction S.W.A.T. Patriot Games
1,Robert Downey Jr.,"$13,952,302,902",Avengers: Endgame Avengers: Infinity War The Avengers Avengers: Age of Ultron Iron Man 3 Captain America: Civil War Spider-Man: Homecoming Iron Man 2 Iron Man Sherlock Holmes: A Game of Shadows Sherlock Holmes Dolittle Due Date Tropic Thunder
2,Benedict Cumberbatch,"$13,508,189,548",Avengers: Endgame Avengers: Infinity War Spider-Man: No Way Home The Hobbit: The Battle of the Five Armies The Hobbit: The Desolation of Smaug Doctor Strange in the Multiverse of Madness Thor: Ragnarok Doctor Strange The Grinch Star Trek Into Darkness 1917 Penguins of Madagascar The Imitation Game 12 Years a Slave War Horse
3,Scarlett Johansson,"$13,473,169,463",Avengers: Endgame Avengers: Infinity War The Avengers Avengers: Age of Ultron Captain America: Civil War The Jungle Book Captain America: The Winter Soldier Sing Iron Man 2 Lucy Sing 2 Black Widow The Horse Whisperer He's Just Not That Into You
4,Alan Tudyk,"$12,357,426,928",Frozen II Frozen II Frozen Transformers: Dark of the Moon Rogue One: A Star Wars Story Zootopia Ice Age: Continental Drift Big Hero 6 Moana Ralph Breaks the Internet Wreck-It Ralph Ice Age Alvin and the Chipmunks: Chipwrecked Maze Runner: The Scorch Trials Lincoln Encanto Knocked Up
5,Idris Elba,"$11,767,250,635",Avengers: Infinity War Avengers: Age of Ultron Finding Dory Zootopia The Jungle Book Thor: Ragnarok Thor: Love and Thunder Fast & Furious Presents: Hobbs & Shaw Thor: The Dark World Thor Pacific Rim Prometheus Sonic the Hedgehog 2 Star Trek Beyond American Gangster
6,Chris Hemsworth,"$11,741,117,528",Avengers: Endgame Avengers: Infinity War The Avengers Avengers: Age of Ultron Thor: Ragnarok Thor: Love and Thunder Thor: The Dark World Thor Snow White and the Huntsman Star Trek Men in Black: International Ghostbusters
7,Zoe Saldaña,"$11,434,686,068",Avatar Avengers: Endgame Avengers: Infinity War Guardians of the Galaxy Vol. 2 Guardians of the Galaxy Pirates of the Caribbean: The Curse of the Black Pearl Star Trek Into Darkness Star Trek Star Trek Beyond The Terminal
8,Andy Serkis,"$11,305,428,948",Star Wars: Episode VII - The Force Awakens Black Panther Star Wars: Episode VIII - The Last Jedi The Lord of the Rings: The Return of the King The Lord of the Rings: The Two Towers The Lord of the Rings: The Fellowship of the Ring The Batman Dawn of the Planet of the Apes King Kong War for the Planet of the Apes Rise of the Planet of the Apes The Adventures of Tintin Flushed Away
9,Chris Evans,"$11,179,294,976",Avengers: Endgame Avengers: Infinity War The Avengers Avengers: Age of Ultron Captain America: Civil War Captain America: The Winter Soldier Captain America: The First Avenger Fantastic Four Knives Out Fantastic Four: Rise of the Silver Surfer Lightyear


## Main

In [None]:

print_actors_map_pd(actors_map, actors_movies_map)