# ETL - LOAD

In [28]:
%matplotlib inline

In [29]:
# import dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
from config import DATABASE_URI
import datetime

In [30]:
# Create connection
engine = create_engine(DATABASE_URI)
conn = engine.connect()

ValueError: invalid literal for int() with base 10: 'postgres:5432'

In [None]:
# Import csv files to load in database and inspect
# FinalMoviesBudget
FinalMoviesBudget = pd.read_csv('FinalMoviesBudget.csv')
FinalMoviesBudget.head()

In [None]:
# Convert Date column to date time object
FinalMoviesBudget['ReleaseDate'] = pd.to_datetime(FinalMoviesBudget['ReleaseDate'])

In [None]:
# Convert column to lower case to avoid error while loading into database
FinalMoviesBudget.columns = FinalMoviesBudget.columns.str.lower()

In [None]:
# netflix_movies_revenue
netflix_movies_revenue = pd.read_csv('netflix_movies_revenue.csv')
netflix_movies_revenue.head()

In [None]:
# inspect data types
netflix_movies_revenue.info()

In [None]:
# Check tables if loaded
engine.table_names()

In [None]:
# Load finalmoviesbudget to the database
FinalMoviesBudget.to_sql(name = 'finalmoviesbudget', con = engine, if_exists = 'append', index = False)

In [None]:
netflix_movies_revenue.to_sql(name = 'netflix_movies_revenue', con = engine, if_exists = 'append', index = False)

In [None]:
# Query moviesbudget_db to confirm tables are loaded
# finalmoviesbudget
pd.read_sql_query('SELECT * FROM finalmoviesbudget', con = engine).head()


In [None]:
# Query finalmoviesbudget to confirm tables are loaded
# netflix_movies_revenue
pd.read_sql_query('SELECT * FROM netflix_movies_revenue', con = engine).head()

In [None]:
# Budget Analysis
# load 
budget = pd.read_sql_query('SELECT * FROM finalmoviesbudget', con = engine)

In [None]:
# Top 10 budget movies
top_10_budget = budget.sort_values('productionbudget', ascending=False).head(10)
top_10_budget

In [None]:
# Extract title, productionbudget, domesticgross, worldwidegross
top_10_budget[['title', 'productionbudget', 'worldwidegross']]

In [None]:
# Bar chart of top 10 budget
ax = top_10_budget[['productionbudget', 'worldwidegross']].plot(kind='bar', title ="Movies Budget and Gross Revenue", figsize=(15, 10), legend=True, fontsize=12)
ax.set_xlabel("Movies", fontsize=12)
ax.set_xticklabels(top_10_budget['title'], rotation=90)
ax.set_ylabel("Amount ($)", fontsize=16)
plt.savefig('budget_gross_top10.png')
plt.show()

In [None]:
# Netflix top 10 budget movies
# First merge netflix_movies_revenue and finalmoviesbudget
netflix_titles = pd.read_sql_query('SELECT * FROM netflix_movies_revenue', con = engine)

In [None]:
netflix_title_df = netflix_titles['id']
netflix_title_df.head()

In [None]:
# Merge netflix_title_df with budget
NetflixBudgetMerged = pd.merge(netflix_title_df, budget, on = 'id')
NetflixBudgetMerged.head()

In [None]:
NetflixBudgetMerged.info()

In [None]:
# Top 10 netflix budget
NetflixBudgetMerged_top10 = NetflixBudgetMerged.sort_values('productionbudget', ascending=False).head(10)
NetflixBudgetMerged_top10

In [None]:
NetflixBudgetMerged_top10[['title']]

In [None]:
# Plot netflix top 10
# # Extract title, productionbudget, domesticgross, worldwidegross
ax = NetflixBudgetMerged_top10[['productionbudget', 'worldwidegross']].plot(kind='bar', title ="Netflix Budget and Gross Revenue", figsize=(15, 10), legend=True, fontsize=12)
ax.set_xlabel("Movies", fontsize=12)
ax.set_xticklabels(NetflixBudgetMerged_top10['title'], rotation=90)
ax.set_ylabel("Amount ($)", fontsize=16)
plt.savefig('netflixbudget_gross_top10.png')
plt.show()

In [None]:
conn.close()