In [None]:
import pandas as pd
import requests
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import numpy as np
from numpy import mean
import json
import gzip
from subprocess import check_output
import math
import ast
from matplotlib.lines import Line2D
%matplotlib inline

In [None]:
production_df = pd.read_excel('../data/tmdb_production_data.xlsx', index_col=0)
movie_df = pd.read_excel('../data/tmdb_movie_data.xlsx', index_col=0)
genre_df = pd.read_excel('../data/movie_genres_data.xlsx', index_col=0)

In [None]:
movie_money = production_df[['movie_title', 'movie_budget', 'movie_revenue', 'movie_profits']]
movie_money

In [None]:
movie_profits = movie_money.copy(deep=True)
movie_profits = movie_profits[movie_profits['movie_budget']>0]
movie_profits = movie_profits[movie_profits['movie_revenue']>0]
movie_profits = movie_profits.drop(columns={'movie_budget', 'movie_revenue'})
movie_profits

In [None]:
all_yearly_releases = movie_df[['movie_title', 'movie_release_year']]
all_yearly_releases

In [None]:
yearly_profit_releases = movie_profits.merge(all_yearly_releases)
yearly_profit_releases = yearly_profit_releases.drop(columns={'movie_title'})
yearly_profit_releases = yearly_profit_releases.groupby('movie_release_year').sum()
yearly_profit_releases.reset_index(inplace=True)
yearly_profit_releases['movie_profits'] = yearly_profit_releases['movie_profits']/1000000000
yearly_profit_releases.rename(columns={'movie_release_year':'Year Released', 'movie_profits':'Sum Profits (Billions)'}, inplace=True) 
yearly_profit_releases

In [None]:
plt.figure(figsize=(14,11))
sns.set(style="ticks", context="talk")
plt.style.use("dark_background")

sns.lineplot(data=yearly_profit_releases, x="Year Released", y="Sum Profits (Billions)", ci=False);

In [None]:
yearly_releases = movie_df[['movie_title', 'movie_release_year']]
yearly_releases = yearly_releases.groupby('movie_release_year').count()
yearly_releases.reset_index(inplace=True)
yearly_releases.rename(columns={'movie_release_year':'Year Released', 'movie_title':'Number of Movies Released'}, inplace=True)
yearly_releases

In [None]:
plt.figure(figsize=(14,11))
sns.set(style="ticks", context="talk")
plt.style.use("dark_background")

sns.lineplot(data=yearly_releases, x="Year Released", y="Number of Movies Released", ci=False);

In [None]:
merged_yearly_releases = yearly_releases.merge(yearly_profit_releases, how='left')
merged_yearly_releases.set_index('Year Released', inplace=True)
merged_yearly_releases

In [None]:
plt.figure(figsize=(14,11))
sns.set(style="ticks", context="talk")
plt.style.use("dark_background")

g = sns.lineplot(data=merged_yearly_releases['Number of Movies Released'], color="yellow")
sns.lineplot(data=merged_yearly_releases['Sum Profits (Billions)'], color="g", ax=g.axes.twinx())
g.legend(handles=[Line2D([], [], marker='_', color="yellow", label='Number of Movies Released'), Line2D([], [], marker='_', color="g", label='Sum Profits')]);

## Profit over last 5 years

In [None]:
year_month_releases = movie_df[['movie_title', 'movie_release_year', 'movie_release_month']]
year_month_releases

In [None]:
last_five_years = year_month_releases[year_month_releases['movie_release_year'] > 2017]
last_five_years

In [None]:
five_year_profits['date'] = pd.to_datetime(five_year_profits['movie_release_year'] + '-' + five_year_profits['movie_release_month'].astype(str) + '-01')

In [None]:
five_year_profits = last_five_years.merge(movie_profits, how='left')
five_year_profits = five_year_profits.drop(columns={'movie_title'})
five_year_profits = five_year_profits.groupby(['movie_release_month', 'movie_release_year']).sum()
five_year_profits.reset_index(inplace=True)
five_year_profits['movie_release_year'] = five_year_profits['movie_release_year'].astype(int).astype(str)
five_year_profits['movie_release_month'] = five_year_profits['movie_release_month'].astype(int).astype(str).str.zfill(2)
five_year_profits.head(5)

In [None]:
five_year_profits['date'] = pd.to_datetime(five_year_profits['movie_release_year'] + '-' + five_year_profits['movie_release_month'] + '-01')
five_year_profits.head()

In [None]:
five_year_profits['movie_profits'] = five_year_profits['movie_profits']/1000000000
five_year_profits.rename(columns={'movie_release_year':'Year Released', 'movie_profits':'Sum Profits (Billions)', 'movie_release_month':'Month Released', 'date':'Date of Release'}, inplace=True) 
five_year_profits.head()

In [None]:
plt.figure(figsize=(14,11))
sns.set(style="ticks", context="talk")
plt.style.use("dark_background")

sns.lineplot(data=five_year_profits, x="Date of Release", y="Sum Profits (Billions)", ci=False);

## Budget over time

In [None]:
movie_budget = movie_money.copy(deep=True)
movie_budget = movie_budget[movie_budget['movie_budget']>0]
movie_budget = movie_budget[movie_budget['movie_revenue']>0]
movie_budget = movie_budget.drop(columns={'movie_profits', 'movie_revenue'})
movie_budget

In [None]:
yearly_budget_releases = movie_budget.merge(all_yearly_releases)
yearly_budget_releases = yearly_budget_releases.drop(columns={'movie_title'})
yearly_budget_releases = yearly_budget_releases.groupby('movie_release_year').sum()
yearly_budget_releases.reset_index(inplace=True)
yearly_budget_releases['movie_budget'] = yearly_budget_releases['movie_budget']/1000000000
yearly_budget_releases.rename(columns={'movie_release_year':'Year Released', 'movie_budget':'Sum of Budgets (Billions)'}, inplace=True) 
yearly_budget_releases

In [None]:
plt.figure(figsize=(14,11))
sns.set(style="ticks", context="talk")
plt.style.use("dark_background")

sns.lineplot(data=yearly_budget_releases, x="Year Released", y="Sum of Budgets (Billions)", ci=False);

In [None]:
merged_yearly_spending = yearly_budget_releases.merge(yearly_profit_releases, how='inner')
merged_yearly_spending.set_index('Year Released', inplace=True)
merged_yearly_spending

In [None]:
yearly_budget = merged_yearly_spending.copy(deep=True)
yearly_budget = yearly_budget.drop(columns={'Sum Profits (Billions)'})
yearly_budget.reset_index(inplace=True)
yearly_budget

In [None]:
yearly_profits = merged_yearly_spending.copy(deep=True)
yearly_profits = yearly_profits.drop(columns={'Sum of Budgets (Billions)'})
yearly_profits.reset_index(inplace=True)
yearly_profits

In [None]:
plt.figure(figsize=(14,11))
sns.set(style="ticks", context="talk")
plt.style.use("dark_background")

g = sns.lineplot(data=yearly_budget, x='Year Released', y='Sum of Budgets (Billions)', color='r')

sns.lineplot(data=yearly_profits, x='Year Released', y='Sum Profits (Billions)', color='g', ax=g)


plt.ylabel('USD $ (Billions)')
g.legend(handles=[Line2D([], [], marker='_', color="red", label='Sum of Budgets'), Line2D([], [], marker='_', color="g", label='Sum Profits')]);