In [None]:
import pandas as pd
import requests
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import numpy as np
from numpy import mean
import json
import gzip
from subprocess import check_output
import math
import ast
%matplotlib inline

In [None]:
production_df = pd.read_excel('../data/tmdb_production_data.xlsx', index_col=0)
movie_df = pd.read_excel('../data/tmdb_movie_data.xlsx', index_col=0)
genre_df = pd.read_excel('../data/movie_genres_data.xlsx', index_col=0)
cast_df = pd.read_excel('../data/tmdb_cast_data.xlsx', index_col=0)

In [None]:
director_df = cast_df.loc[cast_df['cast_known_for_department']=='Acting']
director_df

In [None]:
director_movie = director_df[['movie_id', 'cast_name']]
director_movie = director_movie.groupby("cast_name").agg({"movie_id": lambda x: list(x)})
director_movie.reset_index(inplace=True)
director_movie = director_movie.rename(columns={'cast_name':'Director'})
director_movie

In [None]:
movie_profits = production_df[['movie_title', 'movie_profits']]
movie_profits

In [None]:
movie_profits = production_df[['movie_title', 'movie_profits']]
movie_profits

In [None]:
movie_ids = movie_df[['movie_id', 'movie_title']]
movie_ids

In [None]:
profit_ids = movie_profits.merge(movie_ids)
profit_ids = profit_ids[profit_ids['movie_profits'] != 0]
profit_ids = profit_ids.drop(columns={'movie_title'})
profit_ids

In [None]:
profit_genre = profit_ids.merge(genre_df)

animation_movies = profit_genre[profit_genre['Animation'] == 1]
animation_movies

In [None]:
animation_movies = animation_movies.drop(columns={'Action', 'Adventure', 'Animation', 'Comedy', 'Crime', 'Drama', 'Family', 'Fantasy', 'History', 'Horror', 'Music', 'Mystery', 'Romance', 'Science Fiction', 'TV Movie', 'Thriller', 'War', 'Western'})
animation_movies

In [None]:
repeated_rows = []
for movie_id, profits in zip(animation_movies['movie_id'], animation_movies['movie_profits']):
    directors = director_movie[director_movie['movie_id'].apply(lambda x: movie_id in x)]['Director'].values
    for director in directors:
        repeated_rows.append({'movie_id': movie_id, 'movie_profits': profits, 'Director': director})

animation_movies_repeated = pd.DataFrame(repeated_rows)

In [None]:
animation_movies_repeated

In [None]:
animation_movies_repeated = animation_movies_repeated.drop(columns={'movie_id'})

In [None]:
movies_directed = animation_movies_repeated.groupby('Director').size()
movies_directed = pd.DataFrame(movies_directed)
movies_directed.reset_index(inplace=True)
movies_directed = movies_directed.rename(columns={0:'Movies Directed'})
movies_directed

In [None]:
animation_movies_repeated = animation_movies_repeated.groupby('Director').mean()
animation_movies_repeated.reset_index(inplace=True)
animation_movies_repeated['movie_profits'] = round(animation_movies_repeated['movie_profits']/1000000,0)
animation_movies_repeated

In [None]:
director_data = animation_movies_repeated.merge(movies_directed)
director_data

In [None]:
director_data.sort_values(['Movies Directed'], ascending=0)

In [None]:
director_data = director_data[director_data['Movies Directed'] > 5]
director_data = director_data.sort_values(['movie_profits'], ascending=0)
director_data = director_data.head(10)
director_data = director_data.sort_values(['movie_profits'], ascending=1)
director_data

In [None]:
graph_data = director_data.copy(deep=True)
graph_data = graph_data.rename(columns={'Director':'Actor', 'movie_profits':'Average Movie Profit (Millions)', 'Movies Directed':'Movies Starred In'})
graph_data

In [None]:
means = list(graph_data['Average Movie Profit (Millions)'])
means = [int(i) for i in means]
means

In [None]:
actors = graph_data['Actor'].to_list()
actors

In [None]:
plt.figure(figsize=(25,11))
sns.set(style="ticks", context="talk")
plt.style.use("dark_background")

palette = {'Teresa Ganzel':'#33C1DD', 'John Ratzenberger':'#D13F3F','Colette Whitaker':'#9BF090','Jackie Gonneau':'#D6AAD7',
        'Tom Hanks':'#F1E441','Teddy Newton':'#BB77B8','Jim Ward':'#9A8154','Bonnie Hunt':'#84EA08','Lori Alan':'#FDADA5','Bob Peterson':'#A36565'}

bar = sns.barplot(x='Actor', y='Average Movie Profit (Millions)', palette=palette, data=graph_data, ci=False);

for x in range(len(means)):
    bar.text(x,means[x]+9.5, str(round(means[x],0)),
            fontdict = dict(color='white', fontsize=18),
            horizontalalignment = 'center')

In [None]:
means2 = list(graph_data['Movies Starred In'])
means2 = [int(i) for i in means2]
means2

In [None]:
plt.figure(figsize=(25,11))
sns.set(style="ticks", context="talk")
plt.style.use("dark_background")

palette = {'Teresa Ganzel':'#33C1DD', 'John Ratzenberger':'#D13F3F','Colette Whitaker':'#9BF090','Jackie Gonneau':'#D6AAD7',
        'Tom Hanks':'#F1E441','Teddy Newton':'#BB77B8','Jim Ward':'#9A8154','Bonnie Hunt':'#84EA08','Lori Alan':'#FDADA5','Bob Peterson':'#A36565'}

bar = sns.barplot(x='Actor', y='Movies Starred In', palette=palette, data=graph_data, ci=False);

for x in range(len(means2)):
    bar.text(x,means2[x]+.25, str(round(means2[x],0)),
            fontdict = dict(color='white', fontsize=18),
            horizontalalignment = 'center')