In [2]:
import requests
import pandas as pd

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [3]:
import os
TMDB_KEY = os.getenv("TMDB_KEY")
TMDB_TOKEN = os.getenv("TMDB_TOKEN")

In [67]:
# make first api call
base_url = "https://api.themoviedb.org/3"
auth = '?api_key={}'.format(TMDB_KEY)
test_movie_id = '/movie/550'
res = requests.get(base_url + test_movie_id + auth) # test
res.raise_for_status()
json.loads(res.text)

{'adult': False,
 'backdrop_path': '/pCUdYAaarKqY2AAUtV6xXYO8UGY.jpg',
 'belongs_to_collection': None,
 'budget': 63000000,
 'genres': [{'id': 18, 'name': 'Drama'}],
 'homepage': 'http://www.foxmovies.com/movies/fight-club',
 'id': 550,
 'imdb_id': 'tt0137523',
 'original_language': 'en',
 'original_title': 'Fight Club',
 'overview': 'A ticking-time-bomb insomniac and a slippery soap salesman channel primal male aggression into a shocking new form of therapy. Their concept catches on, with underground "fight clubs" forming in every town, until an eccentric gets in the way and ignites an out-of-control spiral toward oblivion.',
 'popularity': 31.636,
 'poster_path': '/4GpwvwDjgwiShr1UBJIn5fk1gwT.jpg',
 'production_companies': [{'id': 508,
   'logo_path': '/7PzJdsLGlR7oW4J0J5Xcd0pHGRg.png',
   'name': 'Regency Enterprises',
   'origin_country': 'US'},
  {'id': 711,
   'logo_path': '/tEiIH5QesdheJmDAqQwvtN60727.png',
   'name': 'Fox 2000 Pictures',
   'origin_country': 'US'},
  {'id': 205

In [57]:
import json

In [58]:
fight_club = json.loads(res.text)

Grabbing keywords for Fight Club:

In [61]:
def fetch_keywords(movie_id):
    url = base_url + '/movie/{}/keywords'.format(movie_id) + auth
    res = requests.get(url)
    res.raise_for_status()
    keywords = json.loads(res.text)
    return keywords

In [62]:
fetch_keywords(fight_club['id'])

{'id': 550,
 'keywords': [{'id': 818, 'name': 'based on novel or book'},
  {'id': 825, 'name': 'support group'},
  {'id': 851, 'name': 'dual identity'},
  {'id': 1541, 'name': 'nihilism'},
  {'id': 1721, 'name': 'fight'},
  {'id': 3927, 'name': 'rage and hate'},
  {'id': 4142, 'name': 'insomnia'},
  {'id': 4565, 'name': 'dystopia'},
  {'id': 9181, 'name': 'alter ego'},
  {'id': 34117, 'name': 'cult film'},
  {'id': 156761, 'name': 'split personality'},
  {'id': 179173, 'name': 'quitting a job'},
  {'id': 212803, 'name': 'dissociative identity disorder'},
  {'id': 249899, 'name': 'graphic violence'},
  {'id': 260426, 'name': 'self destructiveness'}]}

Let's see if we can use the find endpoint and check by IMDB id. We want to see if we can use this API to fill in missing values from IMDB.

In [75]:
def search_by_id(external_id):
    url = base_url + '/find/{}'.format(external_id) + auth + '&external_source=imdb_id'
    res = requests.get(url)
    print("URL: {}".format(res.url))
    res.raise_for_status()
    res = json.loads(res.text)
    return res

In [77]:
missing_id = 'tt10427674'
fight_club_id = 'tt0137523'
search_by_id(missing_id)

URL: https://api.themoviedb.org/3/find/tt10427674?api_key=a93d153365af7da29953bf7b3cf3d498&external_source=imdb_id


{'movie_results': [],
 'person_results': [],
 'tv_results': [],
 'tv_episode_results': [],
 'tv_season_results': []}

Okay, can't use TMDB to fill in info missing from IMDB. 

That means the IMDB is the number one source of truth.

### Testing the Wikipedia API:

In [79]:
S = requests.Session()

URL = "https://en.wikipedia.org/w/api.php"

SEARCHPAGE = "Fight Club"

PARAMS = {
    "action": "query",
    "format": "json",
    "list": "search",
    "srsearch": SEARCHPAGE
}

R = S.get(url=URL, params=PARAMS)
DATA = R.json()

if DATA['query']['search'][0]['title'] == SEARCHPAGE:
    print("Your search page '" + SEARCHPAGE + "' exists on English Wikipedia")

Your search page 'Fight Club' exists on English Wikipedia


In [87]:
parse_req = '?action=parse&page={}&prop=text&formatversion=2'.format(1009041)
page = S.get(url=URL+parse_req)
page.status_code

200

In [89]:
from bs4 import BeautifulSoup
html = BeautifulSoup(page.text)
print(html.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   MediaWiki API result - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgMonthNamesShort":["","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],"wgRequestId":"Xm-nlApAMNwAA-7xkNQAAACH","wgCSPNonce":!1,"wgCanonicalNamespace":"Special","wgCanonicalSpecialPageName":"ApiHelp","wgNamespaceNumber":-1,"wgPageName":"Special:ApiHelp","wgTitle":"ApiHelp","wgCurRevisionId":0,"wgRevisionId":0,"wgArticleId":0,"wgIsArticle":!1,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":[],"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelev

In [80]:
DATA

{'batchcomplete': '',
 'continue': {'sroffset': 10, 'continue': '-||'},
 'query': {'searchinfo': {'totalhits': 54738},
  'search': [{'ns': 0,
    'title': 'Fight Club',
    'pageid': 1009041,
    'size': 93146,
    'wordcount': 10072,
    'snippet': '<span class="searchmatch">Fight</span> <span class="searchmatch">Club</span> is a 1999 American film directed by David Fincher and starring Brad Pitt, Edward Norton, and Helena Bonham Carter. It is based on the 1996 novel',
    'timestamp': '2020-03-16T12:18:49Z'},
   {'ns': 0,
    'title': 'Fight Club (novel)',
    'pageid': 23854860,
    'size': 43893,
    'wordcount': 5546,
    'snippet': '<span class="searchmatch">Fight</span> <span class="searchmatch">Club</span> is a 1996 novel by Chuck Palahniuk. It follows the experiences of an unnamed protagonist struggling with insomnia. Inspired by his doctor\'s',
    'timestamp': '2020-02-20T16:21:34Z'},
   {'ns': 0,
    'title': 'Fight Club 2',
    'pageid': 45657955,
    'size': 19485,
    'w