# TMDB Helper Development

Workflow: 0  

Goal: To develop the TMDB helper class by exploring the functionality of the ```tmdbsimple``` package.   

Result: The file ```moviedb_helper.py``` file is created.  

Notes: The rate limit is stated at 40 request per 10 secs. Therefore a pause of 0.5 secs is enough.  

In [1]:
import os
import time
import random as RD
import pandas as PD

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [3]:
# can't reload script in a different folder

import sys
sys.path.append('../models')
import moviedb_helper as MH 

In [4]:
config = MH.MB.Configuration()
config.base_uri
config.headers
#config.info()

'https://api.themoviedb.org/3'

{'Content-Type': 'application/json',
 'Accept': 'application/json',
 'Connection': 'close'}

## Get Data For One Movie 

In [5]:
URL_FILE = '../data/reelgood_url.csv'
url_df = PD.read_csv(URL_FILE)
url_df.head()
url_df.shape

Unnamed: 0,title,year,reelgood_id
0,1,2013,1-2013
1,"10,000 BC",2008,10000-bc-2008
2,1000 Rupee Note,2016,1000-rupee-note-2016
3,1000 To 1,2014,1000-to-1-2014
4,$100 and a T-Shirt: A Documentary About Zines ...,2004,100-and-a-tshirt-a-documentary-about-zines-in-...


(18496, 3)

In [6]:
# get sample movie

sample_idx = RD.randint(1,url_df.shape[0])
sample_idx  
url_dx = url_df.iloc[sample_idx].to_dict()
url_dx

14355

{'title': 'The City Is Mine',
 'year': 2008,
 'reelgood_id': 'the-city-is-mine-2008'}

In [7]:
client = MH.MovieDBHelper()

tmdb_id = client.get_movie_id(url_dx['title'], url_dx['year'])
movie_dx = client.get_movie_by_id(tmdb_id)
movie_dx

{'title': 'The City Is Mine',
 'original_title': 'The City Is Mine',
 'year': '2008',
 'companies': None,
 'country': 'United States of America',
 'language': 'English',
 'run_time': 105,
 'crew': 'Patrick Pierre, Adriana Gittens',
 'cast': None,
 'poster': '/9KFuCTAZ4H6x9zsUheZJKaP0PPn.jpg',
 'genres': 'Action, Drama, Thriller',
 'collection': None,
 'synopsis': 'A young man coming home after serving time in the penitentiary finds his city in control by local thugs, stops at nothing to gain sole control of a city he feels is rightfully his.',
 'budget': None,
 'gross': None,
 'score': 5.3,
 'votes': 2,
 'tmdb_id': 51922,
 'imdb_id': 'tt1280504'}

In [16]:
# investigate errors from bulk search
# won't be using search anymore, will use discover instead? (still need to match discovered results)

title = 'film about Veterans'.lower()
year = '2018'
tmdb_id = client.get_movie_id(title, year)
movie_dx = client.get_movie_by_id(tmdb_id)
movie_dx

Exception: No movie found: film about veterans (2018).

### Explore Discover()

In [None]:
client.get_movie _by_id(449924)

In [None]:
client = MH.MovieDBHelper()

t0 = time.time()
movie_ls = client.discover_movies('2015')

t1 = time.time()
print(f'movies: {len(movie_ls)}')
print(f'time: {(t1-t0)/60:.2f} mins')

# takes 1+ hrs for one quarter of a year

In [None]:
movie_df = PD.DataFrame(movie_ls)
movie_df.head()
movie_df.info()

In [None]:
PD.DataFrame(discover_results['results'])

### Explore Movies() 

In [None]:
#movie_id = client.get_movie_id(movie_dx['title'], movie_dx['year'])
movie_id = client.get_mo vie_id("Out of the Blue",  2002)

movie_res = MH.MB.Movies(movie_id)
#dir(movie_res)

In [None]:
movie_res.base_uri

In [None]:
#PD.DataFrame(movie_res.changes()['changes'][0]['items'])

In [None]:
# credits 

cast = movie_res.credits()['cast']
cast
cast_df = PD.DataFrame(cast)
cast_df.head()

In [None]:
crew = movie_res.credits()['crew']
crew_df = PD.DataFrame(crew)
crew_df.head()

In [None]:
crew_df['department'].value_counts()

In [None]:
#crew_df.loc[crew_df['department'].isin(['Production', 'Writing', 'Directing'])]
crew_df.loc[crew_df['job'].isin(['Producer', 'Director', 'Writer', 'Executive Producer'])]

# comparing the crews of random movies against the IMDB profile shows good overlap
# take the first director found, and then (from the start) the next 2 different crew
# this gives perfect agreement of the top 2 crew with imdb and wikipedia

In [None]:
movie_res.external_ids()

In [None]:
# info: collection, budget, genres, imdb_id, production_companies, production_countries?, revenue, runtime, language
# get all data from info()

movie_res.info()

In [None]:
movie_res.keywords()

In [None]:
# nothing to do with current movie

#movie_res.latest()

In [None]:
# users that have the movie in their list

movie_res.lists()['total_pages']

In [None]:
# nothing to do with current movie

#movie_res.now_playing()

In [None]:
# nothing to do with current movie

#movie_res.popular()

In [None]:
# 401 Client Error

#movie_res.rating()

In [None]:
# similar movies

#movie_res.recommendations()

In [None]:
len(movie_res.release_dates()['results'])

In [None]:
len(movie_res.releases()['countries'])

In [None]:
len(movie_res.reviews()['results'])

In [None]:
len(movie_res.similar_movies()['results'])

In [None]:
# nothing to do with current movie

#movie_res.top_rated()

In [None]:
len(movie_res.translations()['translations'])

In [None]:
# nothing to do with current movie

#movie_res.upcoming()

In [None]:
# trailers and crap

len(movie_res.videos()['results'])