In [16]:
import pandas as pd
import requests
import json
import os
from dotenv import load_dotenv


# Load environment variables from .env
load_dotenv()

# Get the API key (Create a .env file and add API_KEY variable in it)
API_KEY = os.getenv('API_KEY')

page = 1

print("Fetshing start.")

while True:
    print(f"************* PAGE {page} **************")
    # Call the API to fetch data
    MOVIE_ENDPOINT = "https://api.themoviedb.org/3/trending/person/day?api_key={}&language=en-US&page={}"
    response = requests.get(MOVIE_ENDPOINT.format(API_KEY, page))

    actors_list = []
    movies_list = []
    acted_list = []
    if response.status_code == 200:
        actors = response.json()["results"]
        if actors:
            for actor in actors:
                # Check if "name" field exists and is not empty
                if "known_for" in actor and len(actor["known_for"]) > 0:
                    # convert data details to json type
                    data_json = json.dumps(actor)
                    data = json.loads(data_json)
                    # print("data : ", data)

                    url = "https://api.themoviedb.org/3/person/{}?api_key={}&language=en-US"
                    response = requests.get(url.format(data['id'], API_KEY))
                    if response.status_code == 200:
                        actor_details = response.json()

                        # Create actor JSON object
                        actor = {
                            'actor_id': actor_details['id'],
                            'name': actor_details['name'],
                            'gender': actor_details['gender'],
                            'profile_path': actor_details['profile_path'],
                            'birthday': actor_details['birthday'],
                            'deathday': actor_details['deathday'],
                            'department': actor_details['known_for_department'],
                            'place_of_birth': actor_details['place_of_birth'],
                            'popularity': actor_details['popularity'],
                        }
                        actors_list.append(actor)

                        # Create movie JSON objects
                        for movie in data['known_for']:
                            url = "https://api.themoviedb.org/3/movie/{}?api_key={}&language=en-US"
                            response = requests.get(url.format(movie['id'], API_KEY))
                            if response.status_code == 200:
                                movie_details = response.json()
                            
                                movie_obj = {
                                    'movie_id': movie_details['id'],
                                    'title': movie_details['title'],
                                    'budget': movie_details['budget'],
                                    'original_language': movie_details['original_language'],
                                    'original_title': movie_details['original_title'],
                                    'overview': movie_details['overview'],
                                    'poster_path': movie_details['poster_path'],
                                    'genres': movie_details['genres'],
                                    'popularity': movie_details['popularity'],
                                    'release_date': movie_details['release_date'],
                                    'revenue': movie_details['revenue'],
                                    'vote_average': movie_details['vote_average'],
                                    'vote_count': movie_details['vote_count'],
                                    'production_companies': movie_details['production_companies'],
                                    'production_countries': movie_details['production_countries'],
                                }
                                movies_list.append(movie_obj)

                        # Create acted JSON objects
                        for movie in data['known_for']:
                            acted_obj = {
                                'actor_id': actor_details['id'],
                                'movie_id': movie['id']
                            }
                            acted_list.append(acted_obj)
                    else:
                        print("Actor details not found.")
                else:
                    print("Empty or missing 'actor name' or 'known_for' field. Skipping sending to DB.")
            
            print(f"Data from page {page} has been collected successfully.")
            
            # Specify the directory path
            directory = f'../data/{page}'

            # Create the directory if it does not exist
            if not os.path.exists(directory):
                os.makedirs(directory)

            # Generate dataframes
            df_actors = pd.DataFrame(actors_list)
            df_movies = pd.DataFrame(movies_list)
            df_acted = pd.DataFrame(acted_list)

            # Save files as json type
            df_actors.to_json(orient='records',path_or_buf=f"{directory}/actors.json")
            df_movies.to_json(orient='records',path_or_buf=f"{directory}/movies.json")
            df_acted.to_json(orient='records',path_or_buf=f"{directory}/acted.json")

            print("Files saved successfully.")
            print("##################################")
        else:
            print("Empty data. Skipping sending to DB.")
    else:
        print("Error fetching data from TMDb API")

    page = page + 1

print("Fetshing done.")

Fetshing start.
************* PAGE 1 **************
Files saved successfully.
##################################
************* PAGE 2 **************
Files saved successfully.
##################################
************* PAGE 3 **************
Empty or missing 'actor name' or 'known_for' field. Skipping sending to DB.
Files saved successfully.
##################################
************* PAGE 4 **************
Files saved successfully.
##################################
************* PAGE 5 **************
Files saved successfully.
##################################
************* PAGE 6 **************
Empty or missing 'actor name' or 'known_for' field. Skipping sending to DB.
Files saved successfully.
##################################
************* PAGE 7 **************
Empty or missing 'actor name' or 'known_for' field. Skipping sending to DB.
Files saved successfully.
##################################
************* PAGE 8 **************


KeyboardInterrupt: 