In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json

def scrape_imdb_counts(imdb_id):
    imdb_url = f'https://www.imdb.com/title/{imdb_id}/'
    response = requests.get(imdb_url)
    
    if response.status_code != 200:
        print(f"Failed to retrieve data for {imdb_id}")
        return None, None

    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the section that contains the season and episode information
    try:
        # Example selectors, might need adjustments
        seasons_element = soup.find('a', href=True, text='Seasons')
        if not seasons_element:
            print(f"No seasons found for {imdb_id}")
            return None, None
        
        # Find number of seasons
        seasons = len(soup.select('.seasons-and-year-nav a[href*="/season/"]'))

        # Find total episodes across all seasons
        episodes = 0
        for season in soup.select('.seasons-and-year-nav a[href*="/season/"]'):
            season_url = f"https://www.imdb.com{season['href']}"
            season_response = requests.get(season_url)
            season_soup = BeautifulSoup(season_response.text, 'html.parser')
            episode_count = len(season_soup.select('.episode'))
            episodes += episode_count

        return seasons, episodes
    except Exception as e:
        print(f"Error parsing IMDb data: {e}")
        return None, None


df = pd.read_csv('final_data.csv')


shows_dict = {}

for index, row in df.iterrows():
    show_name = row['name']
    
    
    imdb_id = row['imdb_id']
    seasons, episodes = scrape_imdb_counts(imdb_id)
    
    if seasons is not None:
        show_details = {
            'title': show_name,  
            'total_seasons': seasons,
            'total_episodes': episodes
        }

        shows_dict[show_name] = show_details

# Save the data to a JSON file
with open('imdb_shows_counts.json', 'w', encoding='utf-8') as json_file:
    json.dump(shows_dict, json_file, ensure_ascii=False, indent=4)

print("Details saved to 'imdb_shows_counts.json'")


KeyError: 'imdb_id'