In [1]:
# Import Libraries

from bs4 import BeautifulSoup
import requests
import openpyxl

In [2]:
# Creating an excel file

excel = openpyxl.Workbook()

# Viewing the sheet name

print(excel.sheetnames)

# Being on the active sheet
sheet = excel.active

# Renaming the sheet name
sheet.title = 'Top 250 TV Shows'

# Viewing the new sheet name
print(excel.sheetnames)

# Add the 4 column names to the sheet
sheet.append(['TV Show Rank', 'TV Show Name', 'Year of Release', 'IMDb Rating'])

['Sheet']
['Top 250 TV Shows']


In [3]:
# Setup the URL

URL = "https://www.imdb.com/chart/toptv/?ref_=nv_tvv_250"

try:

    page = requests.get(URL)

    # Error will be thrown in case there are issues with the URL

    page.raise_for_status()

    # Getting the html text from the URL and parse it

    soup = BeautifulSoup(page.text, 'html.parser')

    # Accessing the html that contains the information that is needed
    
    tv_shows = soup.find('tbody', class_= "lister-list").find_all('tr')

    # Running a for loop to collect the information for each of the tv shows
    
    for tv_show in tv_shows:

        # Collecting the rank of the tv show

        rank = tv_show.find('td', class_ = "titleColumn").get_text(strip=True).split('.')[0]

        # Collecting the name of the tv show

        name = tv_show.find('td', class_ = "titleColumn").a.text

        # Collecting the release year of the tv show

        release_year = tv_show.find('td', class_ = "titleColumn").span.text.strip('()')

        # Collecting the IMDb rating of the tv show

        rating = tv_show.find('td', class_ = "ratingColumn imdbRating").strong.text

        # Adding the information collected from the tv shows into the excel sheet

        sheet.append([rank, name, release_year, rating])

except Exception as e:
    print(e)

# Saving my excel file

excel.save('Resources/IMDb_Top_250_TV_Shows.xlsx')