# Using the TMDB API

In [121]:
import requests
import json
import gzip
import pandas as pd
import urllib.request

## Movie IDs DataFrame

We need to be able to look up the TMDB ID for each movie, so first of all we download, unzip and store this data in a data frame.

In [191]:
# Get the ID file from tmdb
tmdb_id_file = urllib.request.URLopener()
tmdb_id_file.retrieve("http://files.tmdb.org/p/exports/movie_ids_10_09_2021.json.gz", "./data/tmdb_id_file.gz")

# Unzip it and store the string in a list of strings
with gzip.GzipFile("./data/tmdb_id_file.gz", 'r') as fin:   
    json_bytes = fin.read()                      
json_list_of_str = json_bytes.decode().split("\n")

# Turn the list of strings into a list of dictionaries
dict_list = []
for dict_str in json_list_of_str[:-1]:
    real_dict = json.loads(dict_str)
    dict_list.append(real_dict)
    
# Convert it to a dataframe
movie_id_df = pd.DataFrame(dict_list)
movie_id_df.head()

Unnamed: 0,adult,id,original_title,popularity,video
0,False,3924,Blondie,1.185,False
1,False,6124,Der Mann ohne Namen,0.6,False
2,False,8773,L'amour à vingt ans,2.733,False
3,False,25449,New World Disorder 9: Never Enough,1.545,False
4,False,31975,Sesame Street: Elmo Loves You!,0.6,True


## Getting Info for individual movies

In [192]:
# load the API credentials
key_yml = json.load(open("./tmdb_credentials.yml"))
tmdb_api_key = key_yml["api_key"]

In [45]:
# Here's an example URL for one specific movie - note the ID = 55
example_url = "https://api.themoviedb.org/3/movie/55?api_key="

In [183]:
req = requests.get(example_url + tmdb_api_key)

# Here's what the output looks like
example_json = req.json()
example_json

{'adult': False,
 'backdrop_path': '/3YzVtvNERQOIJXnrcWv3xhj5EMT.jpg',
 'belongs_to_collection': None,
 'budget': 2000000,
 'genres': [{'id': 18, 'name': 'Drama'}, {'id': 53, 'name': 'Thriller'}],
 'homepage': '',
 'id': 55,
 'imdb_id': 'tt0245712',
 'original_language': 'es',
 'original_title': 'Amores perros',
 'overview': 'A fatalistic car crash in Mexico City sets off a chain of events in the lives of three persons: a young man aching to run off with his sister-in-law, a supermodel, and a homeless man. Their lives are catapulted into unforeseen circumstances instigated by the seemingly inconsequential destiny of a dog.',
 'popularity': 22.476,
 'poster_path': '/vV4vlD4ool5JSsS1rB82qjCF6z8.jpg',
 'production_companies': [{'id': 5084,
   'logo_path': None,
   'name': 'Altavista Films',
   'origin_country': 'MX'},
  {'id': 110056,
   'logo_path': None,
   'name': 'Zeta Film (MX)',
   'origin_country': ''},
  {'id': 1632,
   'logo_path': '/cisLn1YAUuptXVBa0xjq7ST9cH0.png',
   'name': '

In [184]:
# We can get the description with 'overview'
example_json["overview"]

'A fatalistic car crash in Mexico City sets off a chain of events in the lives of three persons: a young man aching to run off with his sister-in-law, a supermodel, and a homeless man. Their lives are catapulted into unforeseen circumstances instigated by the seemingly inconsequential destiny of a dog.'

## For Movies currently running

In [194]:
cineman_df = pd.read_csv(f"./data/2021-09-24_showtimes_zurich.csv", index_col=0)
cineman_df.head()

Unnamed: 0,movie,genre,age_limit,language,movie_link,showtime,date,cinema,place,rating,cinema_place,latitude,longitude
0,Réveil sur Mars,Documentary,16Y.,O/g,https://www.cineman.ch/en/movie/2020/WakeUpOnM...,11:40,2021-09-24,Houdini,Zürich,not available,Houdini Zürich,47.374607,8.520307
1,The Father,Drama,14/12Y.,E/gf,https://www.cineman.ch/en/movie/2020/TheFather/,11:40,2021-09-24,Houdini,Zürich,4.1,Houdini Zürich,47.374607,8.520307
2,Billie,"Documentary, Music",16/10Y.,E/g,https://www.cineman.ch/en/movie/2019/Billie/,11:50,2021-09-24,Houdini,Zürich,4.8,Houdini Zürich,47.374607,8.520307
3,La Fine Fleur,"Comedy, Drama",14/8Y.,G,https://www.cineman.ch/en/movie/2020/LaFineFleur/,12:00,2021-09-24,Houdini,Zürich,4.5,Houdini Zürich,47.374607,8.520307
4,Tiger und Büffel,Documentary,12Y.,O/g,https://www.cineman.ch/en/movie/2021/TigerUndB...,12:00,2021-09-24,Houdini,Zürich,5.0,Houdini Zürich,47.374607,8.520307


In [244]:
cineman_tmdb_df = pd.merge(left=cineman_df, right=movie_id_df[["id", "original_title"]], left_on="movie", right_on="original_title", how="left")
cineman_tmdb_df.head()

Unnamed: 0,movie,genre,age_limit,language,movie_link,showtime,date,cinema,place,rating,cinema_place,latitude,longitude,id,original_title
0,Réveil sur Mars,Documentary,16Y.,O/g,https://www.cineman.ch/en/movie/2020/WakeUpOnM...,11:40,2021-09-24,Houdini,Zürich,not available,Houdini Zürich,47.374607,8.520307,,
1,The Father,Drama,14/12Y.,E/gf,https://www.cineman.ch/en/movie/2020/TheFather/,11:40,2021-09-24,Houdini,Zürich,4.1,Houdini Zürich,47.374607,8.520307,391409.0,The Father
2,The Father,Drama,14/12Y.,E/gf,https://www.cineman.ch/en/movie/2020/TheFather/,11:40,2021-09-24,Houdini,Zürich,4.1,Houdini Zürich,47.374607,8.520307,600354.0,The Father
3,The Father,Drama,14/12Y.,E/gf,https://www.cineman.ch/en/movie/2020/TheFather/,11:40,2021-09-24,Houdini,Zürich,4.1,Houdini Zürich,47.374607,8.520307,693910.0,The Father
4,The Father,Drama,14/12Y.,E/gf,https://www.cineman.ch/en/movie/2020/TheFather/,11:40,2021-09-24,Houdini,Zürich,4.1,Houdini Zürich,47.374607,8.520307,760777.0,The Father


It fails to match some of the movie titles.

In [226]:
missing_ids = cineman_tmdb_df[cineman_tmdb_df.original_title.isnull()]
missing_ids.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 61 entries, 0 to 301
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   movie           61 non-null     object 
 1   genre           38 non-null     object 
 2   age_limit       61 non-null     object 
 3   language        61 non-null     object 
 4   movie_link      61 non-null     object 
 5   showtime        61 non-null     object 
 6   date            61 non-null     object 
 7   cinema          61 non-null     object 
 8   place           61 non-null     object 
 9   rating          45 non-null     object 
 10  cinema_place    61 non-null     object 
 11  latitude        61 non-null     float64
 12  longitude       61 non-null     float64
 13  id              0 non-null      float64
 14  original_title  0 non-null      object 
dtypes: float64(3), object(12)
memory usage: 7.6+ KB


In [237]:
current_ids = cineman_tmdb_df["id"].dropna().astype("int")
overviews = []

for movie_id in current_ids:
    url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key="
    req = requests.get(url + tmdb_api_key)
    movie_json = req.json()
    overviews.append(movie_json["overview"])

In [239]:
overviews_df = pd.DataFrame({"id":current_ids, "overview":overviews})
overviews_df.head()

Unnamed: 0,id,overview
1,391409,An animated story from Sticky Monster Lab that...
2,600354,A man refuses all assistance from his daughter...
3,693910,Produced through a grant at the American Film ...
4,760777,Follows the struggle of middle-class Hong Kong...
5,798341,"During the Derg Period in Ethiopia, artist Ala..."


In [245]:
cineman_tmdb_df = pd.merge(left=cineman_tmdb_df, right=overviews_df, on="id", how="left")

In [247]:
cineman_tmdb_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2239 entries, 0 to 2238
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   movie           2239 non-null   object 
 1   genre           2180 non-null   object 
 2   age_limit       2239 non-null   object 
 3   language        2239 non-null   object 
 4   movie_link      2239 non-null   object 
 5   showtime        2239 non-null   object 
 6   date            2239 non-null   object 
 7   cinema          2239 non-null   object 
 8   place           2239 non-null   object 
 9   rating          2191 non-null   object 
 10  cinema_place    2239 non-null   object 
 11  latitude        2239 non-null   float64
 12  longitude       2239 non-null   float64
 13  id              2178 non-null   float64
 14  original_title  2178 non-null   object 
 15  overview        2178 non-null   object 
dtypes: float64(3), object(13)
memory usage: 297.4+ KB
