In [1]:
#IMPORT DEPENDENCIES
import sys, requests
import pandas as pd
from pprint import pprint

# Read the Netflix data as a pandas dataframe
data_table = "./Resources/netflix_titles.csv"
netflix_df = pd.read_csv(data_table)


# The user will have to use their own API key when running this code; I pulled our API keys
# in from another file source, shown below
sys.path.insert(1, '/Users/symbioticpenguin/Documents')
from api_keys import comdb, tomdb, tiania_api, tiania_OMDB, jomdb


# Filtering for movies only
filtered_flix = netflix_df[netflix_df["type"]=="Movie"]

# Filtering for only movies made in the United States
filtered_flix = filtered_flix[netflix_df["country"] == "United States"]

# Dropping all NaN rows
filtered_flix = filtered_flix.dropna()

# limiting to 1000 entries because free api calls only allow 1000 :(
filtered_flix = filtered_flix[:1000]

# LeTs TaKe A lOoK aT tHe DaTa
filtered_flix.head()



Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
4,80125979,Movie,#realityhigh,Fernando Lebrija,"Nesta Cooper, Kate Walsh, John Michael Higgins...",United States,"September 8, 2017",2017,TV-14,99 min,Comedies,When nerdy high schooler Dani finally attracts...
20,80060297,Movie,Manhattan Romance,Tom O'Brien,"Tom O'Brien, Katherine Waterston, Caitlin Fitz...",United States,"September 8, 2017",2014,TV-14,98 min,"Comedies, Independent Movies, Romantic Movies",A filmmaker working on a documentary about lov...
23,70304988,Movie,Stonehearst Asylum,Brad Anderson,"Kate Beckinsale, Jim Sturgess, David Thewlis, ...",United States,"September 8, 2017",2014,PG-13,113 min,"Horror Movies, Thrillers","In 1899, a young doctor arrives at an asylum f..."
24,80057700,Movie,The Runner,Austin Stark,"Nicolas Cage, Sarah Paulson, Connie Nielsen, W...",United States,"September 8, 2017",2015,R,90 min,"Dramas, Independent Movies",A New Orleans politician finds his idealistic ...
25,80045922,Movie,6 Years,Hannah Fidell,"Taissa Farmiga, Ben Rosenfield, Lindsay Burdge...",United States,"September 8, 2015",2015,NR,80 min,"Dramas, Independent Movies, Romantic Movies",As a volatile young couple who have been toget...


In [2]:
# This is the test code we used before calling the API 1,000 times!

# movie = "t=" + "Finding Nemo"
# key = "&apikey="+comdb
# url = 'http://www.omdbapi.com/?' + movie +  key
# response = requests.get(url).json()

In [3]:
# This cell contains the for loop to get all the data for movies in netflix in the omdb api

# Creating the list of movie titles to make the API calls on
filtered_movies = filtered_flix['title']

# initializing empty lists to append to in the loop
metascore = []
imdbrating = []
imdbvotes = []
boxoffice = []
title = []

# This is the api call for our OMDB dataframe
for item in filtered_movies:
    
    # If the loop doesn't work, we will skip the API call and list append function.
    try:
        # Creating the string for the URL to call the API from.
        movie = "t=" + item
        key = "&apikey="+jomdb
        url = 'http://www.omdbapi.com/?' + movie +  key
        
        # Storing the response as a json object
        response = requests.get(url).json()
        
        # Append the items we care about from the json
        boxoffice.append(response['BoxOffice'])
        title.append(response['Title'])
        metascore.append(response["Metascore"])
        imdbrating.append(response['imdbRating'])
        imdbvotes.append(response['imdbVotes'])

    except:
        next

In [4]:
# Creating the DataFrame for the OMDB data
omdb_df = pd.DataFrame({'title':title,
                        'metascore':metascore,
                        'imdb_rating':imdbrating,
                        'imdb_votes':imdbvotes,
                        'box_office':boxoffice})

In [5]:
# Joining the two tables: the netflix table and the OMDB table
final_table = filtered_flix.join(omdb_df.set_index('title'),on = 'title', how = 'left')

In [6]:
# Doing away with the rows that contain "NaN" in any column.
final_table = final_table.dropna()

# Write final table to csv for use later
final_table.to_csv('final_table',index = False, header = True, sep = ',')