In [1]:
# Example of Pulling Json API data. 12-19-2022
import cred_user_pwd_keys # a .py file in the same folder that has account details
# Other ways to deal with credentials.
#  - If you trust the computer and are performing development to just make sure things work.
#     * the easisest approach is to embed the API keys/passwords/usernames in this file.
#     * I do not do this, but I do create a copy of the credential information into a specific variable
#     * you can just replace that definition with your credentials
#     * if you store your passwords in plain text in this file DO NOT unpload to git hub or paste the code anywhere on the internet
# - The docker approache: Note the user credentials can be stored as enviroment variables (often used in docker setups for better security)
#     * https://www.twilio.com/blog/environment-variables-python
#     * https://chlee.co/how-to-setup-environment-variables-for-windows-mac-and-linux/
#     * https://blog.pilosus.org/posts/2019/06/07/application-configs-files-or-environment-variables-actually-both/
# - The final way and the way I'm handling this is creating a "settings/config/passwords" text file that stores the information in plain text
#     * THIS settings/passwords file should NEVER be uploaded to Git
#     * add this file to your .gitignore file or similar 
#DO NOT UPLOAD this file if you have hardcoded API keys/passwords
#DO NOT UPLOAD THE cred_user_pwd_keys.py file to GIT or any other file sharing site
import os
import shutil # for copying .jpgs or other raw data from the web request
# Requests is one of the easier to understand modules to download a website or many web related APIs
import requests


#I don't put much into pandas, but you can load info into pandas for all your data manipulation needs
import pandas as pd
# Although not scrictly needed most API calls will return a json format. 
# Python's dictionary and Json are very simlar and easy to swtich between.
import json
import re # used for pattern matching and removing bad characters


#Dealing with dates/times
from datetime import datetime
from dateutil.parser import parse #Let the parser deal with the formatting
import time 

#Example of how to pull enviroment variables
PythonLocation = os.environ.get('PATH')

if PythonLocation:
    print('All your application locations: ')
    print(PythonLocation)
else:
    print("you don't have an enviroment PATH")

All your application locations: 
/home/lex0013/bin/pythonvenv/ke/bin:/home/lex0013/bin:/home/lex0013/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin


# EASY KEY IS PART OF ADDRESS
## The Movie DB V3
* Always refer back to the API information.
* More information can be found here

https://developers.themoviedb.org/3/getting-started/introduction

In [2]:
#The Movie DB V3 Example pulled right from the API information
movieID = 550
#Manual Test Example
akey = cred_user_pwd_keys.TheMovieDB_API_Key_v3_auth

MainMovieInfoUrl = f'https://api.themoviedb.org/3/movie/{movieID}?api_key={akey}'


testWebGet = requests.get(MainMovieInfoUrl)

ParsedData = json.loads(testWebGet.text)

#View the result in a more human readable format
print(json.dumps(ParsedData, indent=4))

{
    "adult": false,
    "backdrop_path": "/hZkgoQYus5vegHoetLkCJzb17zJ.jpg",
    "belongs_to_collection": null,
    "budget": 63000000,
    "genres": [
        {
            "id": 18,
            "name": "Drama"
        },
        {
            "id": 53,
            "name": "Thriller"
        },
        {
            "id": 35,
            "name": "Comedy"
        }
    ],
    "homepage": "http://www.foxmovies.com/movies/fight-club",
    "id": 550,
    "imdb_id": "tt0137523",
    "original_language": "en",
    "original_title": "Fight Club",
    "overview": "A ticking-time-bomb insomniac and a slippery soap salesman channel primal male aggression into a shocking new form of therapy. Their concept catches on, with underground \"fight clubs\" forming in every town, until an eccentric gets in the way and ignites an out-of-control spiral toward oblivion.",
    "popularity": 89.557,
    "poster_path": "/pB8BM7pdSp6B6Ih7QZ4DrQ3PmJK.jpg",
    "production_companies": [
        {
            "

In [3]:
print("Title: ", ParsedData['title'], "| SecondProdCompany: " + ParsedData['production_countries'][1]['name'])
print("Looping through all Genre's")
for genre in ParsedData['genres']:
    print("   sub-json data:", genre)
    print("   -  ", genre["name"])

Title:  Fight Club | SecondProdCompany: United States of America
Looping through all Genre's
   sub-json data: {'id': 18, 'name': 'Drama'}
   -   Drama
   sub-json data: {'id': 53, 'name': 'Thriller'}
   -   Thriller
   sub-json data: {'id': 35, 'name': 'Comedy'}
   -   Comedy


In [4]:
#List all production Companies:
for comp in ParsedData['production_companies']:
    print(comp['name'], ' From: ', comp['origin_country'])

Regency Enterprises  From:  US
Fox 2000 Pictures  From:  US
The Linson Company  From:  US
Taurus Film  From:  DE
Atman Entertainment  From:  
Knickerbocker Films  From:  US


In [5]:
#Export Results to a .json file as an example of one way to extract information 
with open(f"{ParsedData['imdb_id']}.json", 'w') as fp:
    json.dump(ParsedData, fp)
    print("Saved File to: " + os.getcwd() + f"/{ParsedData['imdb_id']}.json")

Saved File to: /ssd/repos/api_pulling_examples_python_go/Python/01_The_Movie_DB_Robust_Example/tt0137523.json


In [6]:
# Using the Find with a IMDb ID---


movie_IMDb_id = 'tt4154796'
externalsource= 'imdb_id'


url = f'https://api.themoviedb.org/3/find/{movie_IMDb_id}?api_key={akey}&language=en-US&external_source={externalsource}'

webdata = requests.get(url)
ParsedData = json.loads(webdata.text)

#View the result in a more human readable format
print(json.dumps(ParsedData, indent=4))

GetTheID = ParsedData["movie_results"][0]['id']

print("TheFirstMovieFoundID:", GetTheID)

{
    "movie_results": [
        {
            "adult": false,
            "backdrop_path": "/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg",
            "id": 299534,
            "title": "Avengers: Endgame",
            "original_language": "en",
            "original_title": "Avengers: Endgame",
            "overview": "After the devastating events of Avengers: Infinity War, the universe is in ruins due to the efforts of the Mad Titan, Thanos. With the help of remaining allies, the Avengers must assemble once more in order to undo Thanos' actions and restore order to the universe once and for all, no matter what consequences may be in store.",
            "poster_path": "/or06FN3Dka5tukK1e9sl16pB3iy.jpg",
            "media_type": "movie",
            "genre_ids": [
                12,
                878,
                28
            ],
            "popularity": 123.326,
            "release_date": "2019-04-24",
            "video": false,
            "vote_average": 8.277,
            "vote_c

In [7]:
# Get one reviews

movie_id = '299534'

pageLoop = 1

url = f'https://api.themoviedb.org/3/movie/{movie_id}/reviews?api_key={akey}&language=en-US&page={pageLoop}'

webdata = requests.get(url)
ParsedData = json.loads(webdata.text)

#View the result in a more human readable format
print(json.dumps(ParsedData, indent=4))



{
    "id": 299534,
    "page": 1,
    "results": [
        {
            "author": "garethmb",
            "author_details": {
                "name": "",
                "username": "garethmb",
                "avatar_path": "/https://www.gravatar.com/avatar/3593437cbd05cebe0a4ee753965a8ad1.jpg",
                "rating": null
            },
            "content": "The epic Marvel Saga that started over ten years ago has been building up to the inevitable clash with the powerful tyrant Thanos (Josh Brolin).  Last year\u2019s \u201cAvengers: Infinity War\u201d set the stage for the highly-anticipated conclusion; \u201cAvengers: End Game\u201d and at long last it has arrived.\r\n\r\nPicking up shortly after the events of the last film, the Avengers must deal with the aftermath of what Thanos has done. The team is naturally divided between wanting revenge, wanting to set things right, and just wanting to take what they have and go on. \r\n\r\nAs time passes and they struggle to accept t

In [8]:
print("Number of Separate Web Calls (pages) Needed: " + str(ParsedData['total_pages']))
print(" - Current (page): " + str(ParsedData['page']))
print(" - Number of descrete reivews on this (page): " + str(len(ParsedData['results'])))

print("")

CreationDTobject = parse(ParsedData['results'][0]['created_at'])
print("The Date time is a: " + str(type(CreationDTobject)))

print("The First Review was created on: " + str(CreationDTobject.date()))
print("    At this time: " + str(CreationDTobject.time()))
print("    In the TimeZone: " + str(CreationDTobject.tzinfo))
print("    or human readable: " + CreationDTobject.strftime("%A %m/%d/%Y, %I:%M %p %Z"))
#https://www.programiz.com/python-programming/datetime/strftime

Number of Separate Web Calls (pages) Needed: 3
 - Current (page): 1
 - Number of descrete reivews on this (page): 20

The Date time is a: <class 'datetime.datetime'>
The First Review was created on: 2019-04-23
    At this time: 22:40:39.757000
    In the TimeZone: tzutc()
    or human readable: Tuesday 04/23/2019, 10:40 PM UTC


## Queries with paramaters


In [9]:
#Example pulled from the API's examples
pageLoop = 1

#We want all movies that are less than PG-13 rating us rating.
#To get more realistic results only return movies with more than 75 votes
#Sort by the vote_average descending. This will give us A LOT of movies it would be worth adding more filters
certification_country="US"
certificationMaxScore="PG-13"
GTNumbVotes=75
sort_by="vote_average.desc"


url = f'https://api.themoviedb.org/3/discover/movie/?api_key={akey}&language=en-US&page={pageLoop}&certification_country={certification_country}&ertification.lte={certificationMaxScore}&vote_count.gte={GTNumbVotes}&sort_by={sort_by}'

webdata = requests.get(url)
ParsedData = json.loads(webdata.text)

#View the result in a more human readable format
print(json.dumps(ParsedData, indent=4))



{
    "page": 1,
    "results": [
        {
            "adult": false,
            "backdrop_path": "/4IMn2EqCBb4n4r6FODOP7fJQIdi.jpg",
            "genre_ids": [
                10402
            ],
            "id": 1022102,
            "original_language": "ko",
            "original_title": "BTS: PERMISSION TO DANCE \uc628 \uc2a4\ud14c\uc774\uc9c0 \u2013 LA",
            "overview": "Purple colors the city of Los Angeles, as BTS brings their \"Permission to Dance\" concert to SoFi Stadium for the first time in two years. In a stadium radiating anticipation and cheer, splendid performances from \"On\" to \"Permission to Dance\" glorify the stage that now comes to life on screen. Be united once again by the power of music.",
            "popularity": 29.047,
            "poster_path": "/9pNGRpOOQpZtqMLncEcQDv86OkW.jpg",
            "release_date": "2022-09-08",
            "title": "BTS: Permission to Dance on Stage - LA",
            "video": false,
            "vote_average": 9.2,

In [10]:
#Take the example of the query above and pull the 3 & 4th page of results only getting the movie ID so we can extract the reviews of those movies

ListOfMovieIDs=[]

#Grab all the movie Ids on the 3 & 4th page
#Example pulled from the API's examples
for pagenum in range (2, 4):
    pageLoop = pagenum

    certification_country="US"
    certificationMaxScore="PG-13"
    GTNumbVotes=75
    sort_by="vote_average.desc"


    url = f'https://api.themoviedb.org/3/discover/movie/?api_key={akey}&language=en-US&page={pageLoop}&certification_country={certification_country}&ertification.lte={certificationMaxScore}&vote_count.gte={GTNumbVotes}&sort_by={sort_by}'

    webdata = requests.get(url)
    ParsedData = json.loads(webdata.text)
    #Loop through each movie
    for Movie in ParsedData['results']:
        ListOfMovieIDs.append(Movie['id'])
        #Note These print statments might fail as there are non US characters in a lot of the movie names
        print("movie: ", Movie['original_title'], " Vote AVG: ", str(Movie['vote_average']), " Vote Count: ", str(Movie['vote_count']))




movie:  Scooby-Doo! Adventures: The Mystery Map  Vote AVG:  8.6  Vote Count:  78
movie:  BLACKPINK: Arena Tour 2018 "Special Final in Kyocera Dome Osaka"  Vote AVG:  8.6  Vote Count:  134
movie:  Lucy Shimmers And The Prince Of Peace  Vote AVG:  8.6  Vote Count:  80
movie:  Huevitos Congelados  Vote AVG:  8.6  Vote Count:  89
movie:  Harry Styles: Behind the Album  Vote AVG:  8.6  Vote Count:  90
movie:  Chivas: La película  Vote AVG:  8.6  Vote Count:  102
movie:  Огонь  Vote AVG:  8.6  Vote Count:  133
movie:  Taylor Swift: The 1989 World Tour - Live  Vote AVG:  8.6  Vote Count:  111
movie:  Schindler's List  Vote AVG:  8.6  Vote Count:  13548
movie:  DC Showcase: Death  Vote AVG:  8.6  Vote Count:  82
movie:  दिलवाले दुल्हनिया ले जायेंगे  Vote AVG:  8.6  Vote Count:  3985
movie:  Michael Jackson: Live in Bucharest - The Dangerous Tour  Vote AVG:  8.6  Vote Count:  86
movie:  Queen: Live at Wembley Stadium  Vote AVG:  8.6  Vote Count:  118
movie:  RBD: Ser o Parecer - The Global Virt

### an example of taking the input movie IDs from above's list and an approach to save this to a file

In [11]:
#Setup some variables
MoviewReviewsList = []
output = pd.DataFrame()
movieCount = 0


for movieid in ListOfMovieIDs:
    movieCount = movieCount + 1
    
    #Do something like this if the run failed and you need to start somewhere in the middle. This will waste some time looping through movies, but it works
    #if movieCount <= 2600:
    #    continue
   
    #Used to occassionally save the output. If there is a crash we can just start about where it failed
    pageLoop = 1
    reviews = []
    
    # We have to call the API once to see how many pages of reviews there are
    url = f'https://api.themoviedb.org/3/movie/{movieid}/reviews?api_key={akey}&page={pageLoop}'
    webdata = requests.get(url)
    ParsedData = json.loads(webdata.text)

    # If there are no results or bad movie go to the next movie
    if ParsedData.get('results') == None:
        #Did not get a response for the movie
        continue
    #Remember the structure above the results is a list of dictionary reviews, we append this to the temporary python list reviews
    reviews.append(ParsedData['results'])
    
    # Now that we have the initial pull if there is more than 1 page of reviews loop through all pages. Note for Large reviews in the 1000s you might want
    # To cut it off early like check if total pages > 100 and stop it there.
    if ParsedData['total_pages'] > 1:
        totalpages = ParsedData['total_pages']
        print("MoreThan1page ", movieid)
        for i in range(2, totalpages+1):
            pageLoop = i
            webdata = requests.get(url)
            ParsedData = json.loads(webdata.text)
            reviews.append(ParsedData['results'])
    MoviewReviewsList.append({"id":movieid, "Reviews": reviews})
    
    #Every 25 movies print off the status
    if movieCount % 25 == 0:
        print(movieid, ' On Movie: ', str(movieCount))
    
    #Every 30 movies Save a backup of the currently completed pulls.
    #Very useful to not keep pulling the same info, but there is still a chance you will get duplicate records that you will need to clean.
    if movieCount % 30 == 0:
        with open(f"5000_TMDB_Reviews{movieCount}.json", 'w') as fp:
            json.dump(MoviewReviewsList, fp)

664280  On Movie:  25


In [12]:
#Assuming the above code completed without error... Save a final copy of the data

with open(f"001_ALL_TMDB_Reviews.json", 'w') as fp:
    json.dump(MoviewReviewsList, fp)

## Another Example of Paramaters and saving the data as it goes

In [13]:
#Only english movies, with original language as english
#Sort by Newest to Oldest primary release date
#Only get movies with over 200 votes
#DOES NOT USE THE POPULARITY NUMBERS
#Because of this is the discover API we get slightly less info on the movies.
lang = 'en-US'
sort = 'primary_release_date.desc'
votecnt = '200'
origlang = 'en'

#Lets store all the info in a dataframe.
output = pd.DataFrame()

movieCount = 0
for page in range(1, 50):
    movieCount = movieCount + 1

    
    #This API is a generic search API to returna list of movies that meet the criteria
    movieurl = f'https://api.themoviedb.org/3/discover/movie?api_key={cred_user_pwd_keys.TheMovieDB_API_Key_v3_auth}&language={lang}&sort_by={sort}&include_adult=false&include_video=false&page={page}&vote_count.gte={votecnt}&with_original_language={origlang}'

    #Go to the page and load it into json
    moviepage = requests.get(movieurl)
    ParsedData = json.loads(moviepage.text)
    
    #Just incase we don't get results wait and re-try
    if ParsedData.get('results') == None:
        #Did not get a response Wait 120 seconds and continue to next page
        time.sleep(120)
        continue
    for movie in ParsedData['results']:
        #Keep dumping retulsts list of movies with some info into the output dataframe
        #TODO look into a better way to do this...
        output = output.append(movie, ignore_index=True)
    
    #Print status to screen every 10 pages
    if movieCount % 10 == 0:
        print(ParsedData['results'][0]['original_title'], ' On Page: ', str(movieCount))
    #Save a running file every 20 pages for safey incase the code crashes
    if movieCount % 20 == 0:
        output.to_csv(f"RecentPopularMovies{movieCount}.csv")
        output.to_pickle(f"RecentPopularMovies{movieCount}.pkl")     
    
    if movieCount % 25 == 0:
        time.sleep(60) #Wait 1 minutes every 25 pages to not overload API this seems like overkill could probably increase to 100
    
    
output.to_csv(f"CompleteRecentPopularMovies.csv")
output.to_pickle(f"CompleteRecentPopularMovies.pkl")

  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie,

Being the Ricardos  On Page:  10


  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie,

The Fallout  On Page:  20


  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie,

Greyhound  On Page:  30


  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie,

Rambo: Last Blood  On Page:  40


  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie, ignore_index=True)
  output = output.append(movie,

## Data outside API like Images or additional links


In [14]:
#Download an Image from the API many APIs will have arbitrary ways to pull extra information like images or links to additional information.
# If you are lucky this will be defined in the API documentation, or in an API call itself as is the cass with the movie db
#https://developers.themoviedb.org/3/getting-started/images
    
ManuallyDefinedUrl = f'https://api.themoviedb.org/3/configuration?api_key={cred_user_pwd_keys.TheMovieDB_API_Key_v3_auth}'

testWebGet = requests.get(ManuallyDefinedUrl)

ParsedConfig = json.loads(testWebGet.text)

#View the result in a more human readable format
print(json.dumps(ParsedConfig, indent=4))

{
    "images": {
        "base_url": "http://image.tmdb.org/t/p/",
        "secure_base_url": "https://image.tmdb.org/t/p/",
        "backdrop_sizes": [
            "w300",
            "w780",
            "w1280",
            "original"
        ],
        "logo_sizes": [
            "w45",
            "w92",
            "w154",
            "w185",
            "w300",
            "w500",
            "original"
        ],
        "poster_sizes": [
            "w92",
            "w154",
            "w185",
            "w342",
            "w500",
            "w780",
            "original"
        ],
        "profile_sizes": [
            "w45",
            "w185",
            "h632",
            "original"
        ],
        "still_sizes": [
            "w92",
            "w185",
            "w300",
            "original"
        ]
    },
    "change_keys": [
        "adult",
        "air_date",
        "also_known_as",
        "alternative_titles",
        "biography",
        "birthday"

In [15]:
# Get one reviews

movie_id = '299534'

pageLoop = 1

url = f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={akey}&language=en-US'

webdata = requests.get(url)
ParsedData = json.loads(webdata.text)





In [16]:
print(ParsedData)

{'adult': False, 'backdrop_path': '/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg', 'belongs_to_collection': {'id': 86311, 'name': 'The Avengers Collection', 'poster_path': '/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg', 'backdrop_path': '/zuW6fOiusv4X9nnW3paHGfXcSll.jpg'}, 'budget': 356000000, 'genres': [{'id': 12, 'name': 'Adventure'}, {'id': 878, 'name': 'Science Fiction'}, {'id': 28, 'name': 'Action'}], 'homepage': 'https://www.marvel.com/movies/avengers-endgame', 'id': 299534, 'imdb_id': 'tt4154796', 'original_language': 'en', 'original_title': 'Avengers: Endgame', 'overview': "After the devastating events of Avengers: Infinity War, the universe is in ruins due to the efforts of the Mad Titan, Thanos. With the help of remaining allies, the Avengers must assemble once more in order to undo Thanos' actions and restore order to the universe once and for all, no matter what consequences may be in store.", 'popularity': 123.326, 'poster_path': '/or06FN3Dka5tukK1e9sl16pB3iy.jpg', 'production_companies': [{'id': 

In [17]:
#View the result in a more human readable format
for key, value in ParsedData.items():
    print('Key:', key, ' | Value: ', value)
    print("--------")

Key: adult  | Value:  False
--------
Key: backdrop_path  | Value:  /7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg
--------
Key: belongs_to_collection  | Value:  {'id': 86311, 'name': 'The Avengers Collection', 'poster_path': '/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg', 'backdrop_path': '/zuW6fOiusv4X9nnW3paHGfXcSll.jpg'}
--------
Key: budget  | Value:  356000000
--------
Key: genres  | Value:  [{'id': 12, 'name': 'Adventure'}, {'id': 878, 'name': 'Science Fiction'}, {'id': 28, 'name': 'Action'}]
--------
Key: homepage  | Value:  https://www.marvel.com/movies/avengers-endgame
--------
Key: id  | Value:  299534
--------
Key: imdb_id  | Value:  tt4154796
--------
Key: original_language  | Value:  en
--------
Key: original_title  | Value:  Avengers: Endgame
--------
Key: overview  | Value:  After the devastating events of Avengers: Infinity War, the universe is in ruins due to the efforts of the Mad Titan, Thanos. With the help of remaining allies, the Avengers must assemble once more in order to undo Thanos' ac

In [18]:
# Download Image with info from the API and info in the results
baseURL = 'http://image.tmdb.org/t/p/'
poster_size = 'w342' #  Other Options "w92", "w154", "w185", "w342", "w500"

FileName = ParsedData['poster_path']

Savepath = "MoviePosterFromWeb.jpg"


poster_full_path_url = baseURL + poster_size + FileName

print("Full Url for Image: ", poster_full_path_url)

r = requests.get(poster_full_path_url, stream=True)
if r.status_code == 200: # in web speak 200 means a good response that we can read
    with open(Savepath, 'wb') as f:
        r.raw.decode_content = True
        shutil.copyfileobj(r.raw, f)     
        print('Image sucessfully Downloaded: ',Savepath) 

Full Url for Image:  http://image.tmdb.org/t/p/w342/or06FN3Dka5tukK1e9sl16pB3iy.jpg
Image sucessfully Downloaded:  MoviePosterFromWeb.jpg
