In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import json
from scipy import stats as st
from sqlalchemy import create_engine

pd.set_option('display.max_rows', None)

In [2]:
#set the CSV files into a database
IMDB_route = "Resources/movies.csv"
stream_route = "Resources/Stream.csv"
imdb_db = pd.read_csv(IMDB_route)
stream_db = pd.read_csv(stream_route)

In [3]:
# Create the pandas DataFrame 
stream_df = pd.DataFrame(stream_db)

stream_df.head()

Unnamed: 0.1,Unnamed: 0,ID,Title,Year,Age,IMDb,Rotten Tomatoes,Netflix,Hulu,Prime Video,Disney+,Type,Directors,Genres,Country,Language,Runtime
0,0,1,Inception,2010,13+,8.8,87%,1,0,0,0,0,Christopher Nolan,"Action,Adventure,Sci-Fi,Thriller","United States,United Kingdom","English,Japanese,French",148.0
1,1,2,The Matrix,1999,18+,8.7,87%,1,0,0,0,0,"Lana Wachowski,Lilly Wachowski","Action,Sci-Fi",United States,English,136.0
2,2,3,Avengers: Infinity War,2018,13+,8.5,84%,1,0,0,0,0,"Anthony Russo,Joe Russo","Action,Adventure,Sci-Fi",United States,English,149.0
3,3,4,Back to the Future,1985,7+,8.5,96%,1,0,0,0,0,Robert Zemeckis,"Adventure,Comedy,Sci-Fi",United States,English,116.0
4,4,5,"The Good, the Bad and the Ugly",1966,18+,8.8,97%,1,0,1,0,0,Sergio Leone,Western,"Italy,Spain,West Germany",Italian,161.0


In [4]:
# Create the pandas DataFrame 
imdb_df = pd.DataFrame(imdb_db)

imdb_df = imdb_df.rename(columns={"title": "Title"})


In [5]:
#merge the imdb_df and stream_df together into one dataframe
merged_df = stream_df.merge(imdb_df, left_on='Title', right_on='Title')

#print all the titles out to see what to cut out
print(merged_df.columns.tolist())

['Unnamed: 0', 'ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type', 'Directors', 'Genres', 'Country', 'Language', 'Runtime', 'imdb_title_id', 'original_title', 'year', 'date_published', 'genre', 'duration', 'country', 'language', 'director', 'writer', 'production_company', 'actors', 'description', 'avg_vote', 'votes', 'budget', 'usa_gross_income', 'worlwide_gross_income', 'metascore', 'reviews_from_users', 'reviews_from_critics']


In [6]:
merged_df['Title'] = merged_df['Title'].str.lower()
merged_df.head()

Unnamed: 0.1,Unnamed: 0,ID,Title,Year,Age,IMDb,Rotten Tomatoes,Netflix,Hulu,Prime Video,...,actors,description,avg_vote,votes,budget,usa_gross_income,worlwide_gross_income,metascore,reviews_from_users,reviews_from_critics
0,0,1,inception,2010,13+,8.8,87%,1,0,0,...,"Leonardo DiCaprio, Joseph Gordon-Levitt, Ellen...",A thief who steals corporate secrets through t...,8.8,1892929,$ 160000000,$ 292576195,$ 829895144,74.0,3439.0,462.0
1,1,2,the matrix,1999,18+,8.7,87%,1,0,0,...,"Keanu Reeves, Laurence Fishburne, Carrie-Anne ...",A computer hacker learns from mysterious rebel...,8.7,1554261,$ 63000000,$ 171479930,$ 465343787,73.0,4069.0,337.0
2,2,3,avengers: infinity war,2018,13+,8.5,84%,1,0,0,...,"Robert Downey Jr., Chris Hemsworth, Mark Ruffa...",The Avengers and their allies must be willing ...,8.5,725138,$ 321000000,$ 678815482,$ 2048359754,68.0,4111.0,591.0
3,3,4,back to the future,1985,7+,8.5,96%,1,0,0,...,"Michael J. Fox, Christopher Lloyd, Lea Thompso...","Marty McFly, a 17-year-old high school student...",8.5,965471,$ 19000000,$ 211159762,$ 388355305,87.0,1050.0,233.0
4,5,6,spider-man: into the spider-verse,2018,7+,8.4,97%,1,0,0,...,"Shameik Moore, Jake Johnson, Hailee Steinfeld,...",Teen Miles Morales becomes Spider-Man of his r...,8.4,275653,$ 90000000,$ 190241310,$ 375540831,87.0,1653.0,379.0


In [7]:
#keep the columns I want for my new merged dataframe

ratings_df = merged_df[['Title', 'IMDb', 'Rotten Tomatoes', 'metascore', 'reviews_from_users', 'reviews_from_critics']]

economy_df = merged_df[['Title', 'usa_gross_income', 'worlwide_gross_income', 'budget']]

crew_df = merged_df[['Title', 'director', 'writer', 'production_company', 'actors']]

details_df = merged_df[['Title', 'genre', 'duration', 'country', 'language', 'description', 'date_published' ]]

streaming_df = merged_df[['Title', 'Netflix' , 'Hulu', 'Prime Video', 'Disney+']]

merged_df = merged_df[['Title', 'year', 'Age', 'IMDb', 'Rotten Tomatoes', 'metascore', 'description', 'Runtime', 'Netflix', 'Hulu', 'Prime Video', 'Disney+']]

In [8]:
ratings_df = ratings_df.rename(columns = {'Title': 'title', 'IMDb': 'imdb', 'Rotten Tomatoes': 'tomatoes', 'reviews_from_users': 'user','reviews_from_critics': 'critics'})
economy_df = economy_df.rename(columns ={'Title': 'title','usa_gross_income': 'usa', 'worlwide_gross_income': 'worldwide'})
crew_df = crew_df.rename(columns ={'Title': 'title', 'production_company': 'company'})
details_df = details_df.rename(columns ={'Title': 'title', 'date_published': 'date'})
streaming_df = streaming_df.rename(columns ={'Title': 'title', 'Netflix': 'netflix', 'Hulu': 'hulu', 'Prime Video': 'prime', 'Disney+': 'disney'})

In [9]:
details_df.head()

Unnamed: 0,title,genre,duration,country,language,description,date
0,inception,"Action, Adventure, Sci-Fi",148,"USA, UK","English, Japanese, French",A thief who steals corporate secrets through t...,2010-07-16
1,the matrix,"Action, Sci-Fi",136,USA,English,A computer hacker learns from mysterious rebel...,1999-06-11
2,avengers: infinity war,"Action, Adventure, Sci-Fi",149,USA,English,The Avengers and their allies must be willing ...,2018-04-26
3,back to the future,"Adventure, Comedy, Sci-Fi",116,USA,English,"Marty McFly, a 17-year-old high school student...",1985-12-04
4,spider-man: into the spider-verse,"Animation, Action, Adventure",117,USA,"English, Spanish",Teen Miles Morales becomes Spider-Man of his r...,2018-12-12


In [10]:
#rename columns to get rid of Querying issue where all of the titles has "" in the title
merged_df = merged_df.rename(columns={'Title': 'title', 'Age':'age', 'IMDb':'IMDb', 'Rotten Tomatoes': 'rotten tomatoes', 
                                      'Runtime': 'runtime', 'Netflix': 'netflix', 'Hulu': 'hulu', 'Prime Video': 'prime', 'Disney+': 'disney'})

merged_df.head(1)

Unnamed: 0,title,year,age,IMDb,rotten tomatoes,metascore,description,runtime,netflix,hulu,prime,disney
0,inception,2010,13+,8.8,87%,74.0,A thief who steals corporate secrets through t...,148.0,1,0,0,0


In [14]:
#create the connection to my postgres account to then add the merged_df
#Please add your username:password for PGadmin. The database name is postgres(which must be created or have the name
#altered to represent an existing database)
# We used "postgres:postgres@localhost/postgres"
rds_connection_string = "postgres:postgres@localhost/postgres"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [17]:
#check that I have the connection and what databases there are
engine.table_names()

['merged_df',
 'economy',
 'crew',
 'details',
 'streaming',
 'movie_reviews',
 'ratings']

In [16]:
#send my merged_df into pgAdmin
merged_df.to_sql(name='movie_reviews', con=engine, index=False)
ratings_df.to_sql(name='ratings', con=engine, index=False)
economy_df.to_sql(name='economy', con=engine, index=False)
crew_df.to_sql(name='crew', con=engine, index=False)
details_df.to_sql(name='details', con=engine, index=False)
streaming_df.to_sql(name='streaming', con=engine, index=False)

In [37]:
#check to make sure merged_df went into pgAdim
#query something to make sure it is not in SQL format
pd.read_sql_query("select hulu FROM movie_reviews where title='back to the future'", con=engine).head()

Unnamed: 0,hulu
0,0


In [38]:
pd.read_sql_query("select imdb FROM ratings where title = 'back to the future'", con=engine).head()

Unnamed: 0,imdb
0,8.5


In [45]:
pd.read_sql_query("select netflix FROM movie_reviews where title = 'back to the future'", con=engine)["netflix"].sum()

1

In [40]:
pd.read_sql_query("select hulu FROM streaming where title = 'back to the future'", con=engine)["hulu"].sum()

0

In [41]:
pd.read_sql_query("select actors FROM crew where title = 'back to the future'", con=engine).head()

Unnamed: 0,actors
0,"Michael J. Fox, Christopher Lloyd, Lea Thompso..."


In [42]:
pd.read_sql_query("select genre FROM details where title = 'back to the future'", con=engine).head()

Unnamed: 0,genre
0,"Adventure, Comedy, Sci-Fi"


In [43]:
pd.read_sql_query("select worldwide FROM economy where title = 'back to the future'", con=engine).head()

Unnamed: 0,worldwide
0,$ 388355305


In [46]:
#List to loop through streaming services and check for results
stream_service = ["hulu", "netflix", "prime", "disney"]
user_input = input("What movie are you trying to watch today?")
user_movie = user_input.lower()

for x in stream_service:
    if pd.read_sql_query(f"select {x} FROM movie_reviews where title = '{user_movie}'", con=engine)[f"{x}"].sum() == 1:
        print(f"{user_input} can be found on {x}")
        break
    if x == "disney":
        print(f"{user_input} was not found on Netflix, Hulu, Amazon Prime or Disney+")
    next

What movie are you trying to watch today?BaCK tO The FutURe
BaCK tO The FutURe can be found on netflix
