In [1]:
import json
import os
from os.path import exists
import pandas as pd
import requests
from tqdm import tqdm

### Authentication to use TMDB API

In [2]:
CONFIG_PATH = SCRAPED_DATA_PATH = os.path.join(os.path.abspath(""), "config.json")

In [3]:
# Load the config file with API credentials
if exists(CONFIG_PATH):
    with open(CONFIG_PATH) as config_file:
        config = json.load(config_file)
        APP_NAME = config["TMDB_APPLICATION_NAME"]
        AUTH_USER = config["TMDB_EMAIL"]
        AUTH_TOKEN = config["TMDB_BEARER_KEY"]
    
    # Define the headers to include the authentication token
    HEADERS = {
        "accept": "application/json",
        "Authorization": f"Bearer {AUTH_TOKEN}",
    }

else:
    print("Config not found!")

# Downloading feature posters from TMDB 

### Create folder for images

To automize the process of fetching and storing the feature images from TMDB, a folder _"images"_ is created if it doesn't already exist.

In [4]:
SCRAPED_FEATURES_PATH = os.path.join(os.path.abspath(""), "TMDB_scraped_features.csv")
IMAGE_FOLDER_PATH = os.path.join(os.path.abspath(""), "images")

In [5]:
features_df = pd.read_csv(SCRAPED_FEATURES_PATH)

In [6]:
if os.path.exists(IMAGE_FOLDER_PATH):
    print(f"Found folder:\n{IMAGE_FOLDER_PATH}")
else:
    os.makedirs(IMAGE_FOLDER_PATH)
    print(f"Created folder:\n{IMAGE_FOLDER_PATH}")

Found folder:
/home/aleksandar-lukic/school/02805_Final_project/code/images


### Iterate over the DataFrame to request the images
 
With the DataFrame complete, the _"backdrop\_path"_ column contains the endpoint for each feature's image backdrop.

By appending these endpoints to the modified base URL, the corresponding .jpg files can be retrieved and stored locally.

In [7]:
def save_feature_image(img, folder_path, feature_id):
    path = os.path.join(folder_path, f"{feature_id}_poster.jpg")
    with open(path, "wb") as f:
        f.write(img.content)

In [8]:
BASE_URL_IMAGE = "https://image.tmdb.org/t/p/original"
HEADERS_IMG = {
    "accept": "application/jpg",
    "Authorization": f"Bearer {AUTH_TOKEN}",
}

In [9]:
sample_df = features_df.iloc[0:10]

In [10]:
# Create the tqdm progress bar
progress_bar = tqdm(sample_df.iterrows(), total=len(sample_df), desc="Saving posters")

for idx, row in progress_bar:
                
        # get id and backdrop endpoint
        feature_id = row["feature_id"]
        feature_backdrop_path = row["poster_path"]
        
        
        # Update progress bar with current id
        progress_bar.set_postfix(current_id=feature_id)
        
        # Send HTTPS GET request to retrieve the image and then save it to folder
        img = requests.get(BASE_URL_IMAGE + feature_backdrop_path, headers = HEADERS_IMG)
        save_feature_image(img, IMAGE_FOLDER_PATH, feature_id)

Saving posters: 100%|█████████████████████████| 10/10 [00:02<00:00,  4.60it/s, current_id=496243]
