In [1]:
import csv
import requests
from bs4 import BeautifulSoup



In [2]:
def read_csv(filename, separator):
    data = []
    with open(filename, 'r', newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile, delimiter=separator)
        for row in reader:
            data.append(row)
    return data

# Usage
filename = 'netflix_cast.csv'
separator = '\t'
netflix_data = read_csv(filename, separator)

print(f"Read {len(netflix_data)} rows from the CSV file.")




Read 825 rows from the CSV file.


In [3]:
def find_missing_cast(data):
    missing_cast = []
    for index, row in enumerate(data):
        if not row['cast_media'] or row['cast_media'].strip() == '':
            missing_cast.append((index, row))
    return missing_cast

# Usage
missing_cast_data = find_missing_cast(netflix_data)

print(f"Found {len(missing_cast_data)} entries with missing cast information.")




Found 825 entries with missing cast information.


In [4]:
def get_cast_from_tmdb(title, type_, api_key):
    search_url = f"https://api.themoviedb.org/3/search/{'movie' if type_ == 'Movie' else 'tv'}"
    params = {
        'api_key': api_key,
        'query': title,
    }

    response = requests.get(search_url, params=params)
    if response.status_code == 200:
        results = response.json().get('results')
        if results:
            # Assuming the first result is the correct one
            movie_id = results[0]['id']
            details_url = f"https://api.themoviedb.org/3/{'movie' if type_ == 'Movie' else 'tv'}/{movie_id}/credits"
            response = requests.get(details_url, params={'api_key': api_key})
            if response.status_code == 200:
                credits = response.json()
                cast = credits.get('cast')
                if cast:
                    cast_names = ', '.join([member['name'] for member in cast[:5]])  # Get top 5 cast members
                    return cast_names
    return None

# Usage
api_key = '39ee08c8e5e7a16a7ec6c92d6218202c'

for index, row in missing_cast_data:
    cast = get_cast_from_tmdb(row['title'], row['type'], api_key)
    if cast:
        netflix_data[index]['cast_media'] = cast
        print(f"Updated cast for {row['title']} to {cast}")
    else:
        print(f"Could not find cast for {row['title']}")



Could not find cast for Jailbirds New Orleans
Updated cast for Vendetta: Truth, Lies and The Mafia to Silvana Saguto, Pino Maniaci
Updated cast for Raja Rasoi Aur Anya Kahaniyan to Manwendra Tripathy, Dr Pushpesh Pant
Updated cast for Stories by Rabindranath Tagore to Radhika Apte
Could not find cast for Luv Kushh
Updated cast for Pororo - The Little Penguin to Lee Sun, Hong So-young, Lee Mi-ja, Ham Su-jung, Koo Ja-hyoung
Could not find cast for How to Be a Cowboy
Could not find cast for Turning Point: 9/11 and the War on Terror
Updated cast for Titletown High to Jake Garcia, Lenley Gross, Kendall Haden, Amari Jones, Samuel Brown
Updated cast for Motel Makeover to April Brown, Sarah Sklash
Updated cast for Oggy Oggy to Kaycie Chase
Updated cast for Comedy Premium League to Rahul Subramanian, Prajakta Koli, Tanmay Bhat, Rytasha Rathore, Aadar Malik
Could not find cast for Mother Goose Club
Updated cast for Bake Squad to Christina Tosi, Maya-Camille Broussard, Ashley Holt, Gonzo Jimenez,

In [5]:
# Proceed to save the updated data back to the CSV


import os

def save_csv(filename, data, separator):
    # Ensure the filename has the correct extension
    if not filename.endswith('.csv'):
        filename += '.csv'

    # Use the current working directory if the original file's directory is not found
    current_directory = os.path.dirname('/mnt/data/netflix_cast.csv')
    if not os.path.exists(current_directory):  # Check if the directory exists
        current_directory = os.getcwd()  # Use the current working directory if it doesn't
        print(f"Directory '/mnt/data' not found. Saving to current directory: {current_directory}")

    output_path = os.path.join(current_directory, filename)

    with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
        if data:
            fieldnames = data[0].keys()
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=separator)

            writer.writeheader()
            for row in data:
                writer.writerow(row)

    print(f"Updated data saved to {output_path}")

# Usage
output_filename = 'updated_netflix_cast'
save_csv(output_filename, netflix_data, separator)


Directory '/mnt/data' not found. Saving to current directory: /content
Updated data saved to /content/updated_netflix_cast.csv
