In [13]:
import requests
import pandas as pd
from tqdm import tqdm


API_KEY = '176de0e4e459b4a8e058468b1241fb56'

def search_tmdb(show_name):
    url = f'https://api.themoviedb.org/3/search/multi?api_key={API_KEY}&query={show_name}'
    response = requests.get(url)
    
    if response.status_code == 200:
        return response.json().get('results', [])
    return []

def fetch_movie_details(tmdb_id):
    url = f'https://api.themoviedb.org/3/movie/{tmdb_id}?api_key={API_KEY}&append_to_response=credits,reviews'
    response = requests.get(url)
    
    if response.status_code == 200:
        return response.json()
    return {}

def fetch_tv_details(tmdb_id):
    url = f'https://api.themoviedb.org/3/tv/{tmdb_id}?api_key={API_KEY}&append_to_response=credits,reviews'
    response = requests.get(url)
    
    if response.status_code == 200:
        return response.json()
    return {}

def names_match(name1, name2):
    return name1.strip().lower() == name2.strip().lower()

def extract_show_data(show_name):
    show_data = {
        'Original Title': show_name,
        'TMDB ID': None,
        'Title': None,
        'Year': None,
        'Genres': None,
        'Directors': None,
        'Cast': None,
        'Writers': None,
        'Rating': None,
        'Plot': None,
        'Awards': None,
        'Seasons': None,
        'Reviews': None,
        'Revenue': None
    }

    search_results = search_tmdb(show_name)
    if search_results:
        best_match = search_results[0]
        tmdb_id = best_match['id']
        media_type = best_match['media_type'] if 'media_type' in best_match else 'movie'
        best_match_name = best_match.get('title', best_match.get('name', ''))

        if not names_match(show_name, best_match_name):
            return show_data

        show_data['TMDB ID'] = tmdb_id

        if media_type == 'movie':
            details = fetch_movie_details(tmdb_id)
        else:
            details = fetch_tv_details(tmdb_id)

        if details:
            show_data['Title'] = details.get('title', None) or details.get('name', None)
            show_data['Year'] = details.get('release_date', None)[:4] if details.get('release_date') else details.get('first_air_date', None)[:4] if details.get('first_air_date') else None
            show_data['Genres'] = ', '.join([genre['name'] for genre in details.get('genres', [])])
            show_data['Directors'] = ', '.join([director['name'] for director in details.get('credits', {}).get('crew', []) if director['job'] == 'Director'])
            show_data['Cast'] = ', '.join([cast['name'] for cast in details.get('credits', {}).get('cast', [])[:10]])
            show_data['Writers'] = ', '.join([writer['name'] for writer in details.get('credits', {}).get('crew', []) if writer['job'] in ['Writer', 'Screenplay']])
            show_data['Rating'] = details.get('vote_average', None)
            show_data['Plot'] = details.get('overview', None)
            show_data['Revenue'] = details.get('revenue', None)

            if 'seasons' in details:
                show_data['Seasons'] = len(details['seasons'])

            reviews = details.get('reviews', {}).get('results', [])
            show_data['Reviews'] = ', '.join([review['content'] for review in reviews]) if reviews else None

    return show_data


data = pd.read_csv("final_data.csv")  
show_names = data['name'] 


shows_data = []
for name in tqdm(show_names, desc="Processing shows"):
    shows_data.append(extract_show_data(name))


df = pd.DataFrame(shows_data)

df

Processing shows: 100%|█████████████████| 11491/11491 [1:12:29<00:00,  2.64it/s]


Unnamed: 0,Original Title,TMDB ID,Title,Year,Genres,Directors,Cast,Writers,Rating,Plot,Awards,Seasons,Reviews,Revenue
0,english pesunaalum tamizhan da,,,,,,,,,,,,,
1,ultimate secrets of luck,,,,,,,,,,,,,
2,jab mila tu,244288.0,Jab Mila Tu,2024,Drama,Lalit Mohan,"Pratik Sehajpal, Mohsin Khan, Alisha Chopra, E...",,0.0,When Maddy and Aneri find themselves sharing a...,,1.0,,
3,love lust & confusion,,,,,,,,,,,,,
4,deepavali specials telugu 2022,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11486,india on film,,,,,,,,,,,,,
11487,sankellu,,,,,,,,,,,,,
11488,mangalyam thanthunaane,,,,,,,,,,,,,
11489,el presidente: corruption game,,,,,,,,,,,,,


In [15]:
df.to_csv("TMDB.csv", index=False)