In [2]:
import requests
import json
import pandas as pd
import numpy as np

# Querying tmdb for item metadata

In [3]:
# This database has been built from guidebox's API
db = pd.read_csv('../data/init_db-fix1.csv', index_col=0)

In [4]:
# Our list of items
item_tmbd_id = list(np.unique(db['themoviedb']))
len(item_tmbd_id)

7402

## Using tmdbsimple

In [5]:
def rapidAPI_key(filename):
    '''
    :param filename: Filename where key for rapid API is located. Just the key, nothing else.
    '''
    
    with open(filename) as f:
        key = f.read().strip()
    
    return key

In [6]:
# I use the package tmdbsimple to do my querying

#! pip install tmdbsimple
import tmdbsimple as tmdb

tmdb_key = rapidAPI_key('../data/tmdb_key.txt')
tmdb.API_KEY = tmdb_key

In [7]:
%%time
# Takes in a csv and queries

db_tuples = []

for show in item_tmbd_id:
    try:
        tv_show = tmdb.TV(show)
        response = tv_show.info()
        db_tuples.append((tv_show.id, tv_show.name,
                          tv_show.genres, tv_show.first_air_date, tv_show.vote_average,
                          tv_show.popularity, tv_show.poster_path, tv_show.origin_country))
        # Other possible data: vote_count, type (scripted or not), languages, 
    except:
        continue

CPU times: user 2min 23s, sys: 9.2 s, total: 2min 32s
Wall time: 28min 51s


In [8]:
tv_db = pd.DataFrame(db_tuples, columns = ['themoviedb','title','genres','first_aired','vote_average','popularity','poster_path','origin_country'])

In [10]:
tv_db.shape

(7290, 8)

In [11]:
tv_db.to_csv('../data/tmdb_metadata')

## Appendix - how to use tmdbsimple

In [37]:
# How to query Movies
movie = tmdb.Movies(63247)
response = movie.info()
print(movie.title)

My Teacher, Mr. Kim


In [12]:
# How to query TV shows
tv_show = tmdb.TV(63247)
response = tv_show.info()
print(tv_show.name)

Westworld


In [13]:
# All the possible info for the show
tv_show.info()

{'backdrop_path': '/yGNnjoIGOdQy3douq60tULY8teK.jpg',
 'created_by': [{'id': 527,
   'credit_id': '55a6280992514109ab000f13',
   'name': 'Jonathan Nolan',
   'gender': 2,
   'profile_path': '/rYBQ8M3hDDY0eThVIvWHmrf4i0Y.jpg'},
  {'id': 1497967,
   'credit_id': '55d2f13ac3a368463c000b66',
   'name': 'Lisa Joy',
   'gender': 1,
   'profile_path': '/gaHPV9066HYWytaOa0V04GJBOH.jpg'}],
 'episode_run_time': [60],
 'first_air_date': '2016-10-02',
 'genres': [{'id': 37, 'name': 'Western'},
  {'id': 878, 'name': 'Science Fiction'}],
 'homepage': 'http://www.hbo.com/westworld',
 'id': 63247,
 'in_production': True,
 'languages': ['en'],
 'last_air_date': '2020-03-29',
 'last_episode_to_air': {'air_date': '2020-03-29',
  'episode_number': 3,
  'id': 2183039,
  'name': 'The Absence of Field',
  'overview': "Some do not like what they see in the mirror but shouldn't blame the mirror.",
  'production_code': '',
  'season_number': 3,
  'show_id': 63247,
  'still_path': '/yqupzLZbj2CW9rd2iFAlFrPt41a.j

In [48]:
# The info I decided to take from it
print(tv_show.id, tv_show.name, tv_show.genres, tv_show.first_air_date, tv_show.vote_average, tv_show.popularity, tv_show.poster_path, tv_show.origin_country)

63247 Westworld [{'id': 37, 'name': 'Western'}, {'id': 878, 'name': 'Science Fiction'}] 2016-10-02 8.1 131.75 /y55oBgf6bVMI7sFNXwJDrSIxPQt.jpg ['US']
