In [1]:
import csv
import requests
from bs4 import BeautifulSoup

In [2]:

# Read CSV file
def read_csv(filename, separator):
    data = []
    with open(filename, 'r', newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile, delimiter=separator)
        for row in reader:
            data.append(row)
    return data

# Usage
filename = 'netflix_country.csv'
separator = '\t'
netflix_data = read_csv(filename, separator)

print(f"Read {len(netflix_data)} rows from the CSV file.")

Read 830 rows from the CSV file.


In [3]:
# Find rows with missing country information
def find_missing_countries(data):
    missing_countries = []
    for index, row in enumerate(data):
        if not row['country'] or row['country'].strip() == '':
            missing_countries.append((index, row))
    return missing_countries

# Usage
missing_country_data = find_missing_countries(netflix_data)

print(f"Found {len(missing_country_data)} entries with missing country information.")

Found 830 entries with missing country information.


In [4]:
# Fetch country information from TMDB
def get_country_from_tmdb(title, type_, api_key):
    search_url = f"https://api.themoviedb.org/3/search/{'movie' if type_ == 'Movie' else 'tv'}"
    params = {
        'api_key': api_key,
        'query': title,
    }

    response = requests.get(search_url, params=params)
    if response.status_code == 200:
        results = response.json().get('results')
        if results:
            # Assuming the first result is the correct one
            movie_id = results[0]['id']
            details_url = f"https://api.themoviedb.org/3/{'movie' if type_ == 'Movie' else 'tv'}/{movie_id}"
            response = requests.get(details_url, params={'api_key': api_key})
            if response.status_code == 200:
                details = response.json()
                countries = details.get('production_countries')
                if countries:
                    # Concatenate country names if there are multiple
                    country_names = ", ".join([country['name'] for country in countries])
                    return country_names
    return None

# Usage
api_key = '39ee08c8e5e7a16a7ec6c92d6218202c'

for index, row in missing_country_data:
    country = get_country_from_tmdb(row['title'], row['type'], api_key)
    if country:
        netflix_data[index]['country'] = country
        print(f"Updated country for {row['title']} to {country}")
    else:
        print(f"Could not find country for {row['title']}")

Could not find country for Vendetta: Truth, Lies and The Mafia
Could not find country for Luv Kushh
Could not find country for How to Be a Cowboy
Updated country for Titletown High to United States of America
Updated country for Motel Makeover to Canada
Updated country for Oggy Oggy to France, Canada, Germany
Could not find country for Top Secret UFO Projects: Declassified
Could not find country for Khawatir
Updated country for Myth & Mogul: John DeLorean to United States of America
Updated country for Tattoo Redo to United States of America
Updated country for Sexy Beasts to United Kingdom
Could not find country for Heist
Updated country for Elize Matsunaga: Once Upon a Crime to Brazil
Could not find country for Cat People
Updated country for Sisters on Track to United States of America
Updated country for This Is Pop to United States of America
Updated country for Penguin Town to South Africa
Could not find country for Human: The World Within
Updated country for High on the Hog: How 

In [5]:
# Save the updated data to CSV
import os

def save_csv(filename, data, separator):
    if not filename.endswith('.csv'):
        filename += '.csv'

    current_directory = os.path.dirname('/mnt/data/netflix_country.csv')
    if not os.path.exists(current_directory):
        current_directory = os.getcwd()
        print(f"Directory '/mnt/data' not found. Saving to current directory: {current_directory}")

    output_path = os.path.join(current_directory, filename)

    with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
        if data:
            fieldnames = data[0].keys()
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=separator)

            writer.writeheader()
            for row in data:
                writer.writerow(row)

    print(f"Updated data saved to {output_path}")

# Usage
output_filename = 'updated_netflix_country'
save_csv(output_filename, netflix_data, separator)


Directory '/mnt/data' not found. Saving to current directory: /content
Updated data saved to /content/updated_netflix_country.csv
