In [78]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [79]:
response = requests.get('https://btech-data-analytics.github.io/bridgerland-technical-college/bookstore.html').text

In [80]:
soup = BeautifulSoup(response, 'html.parser')

In [88]:
table = soup.find("table")
if not table:
    print("No table found.")
else:
    rows = table.find_all("tr")
    for i, row in enumerate(rows):
        cells = [td.get_text(strip=True) for td in row.find_all("td")]
        print(f"Row {i}: {cells}")

Row 0: []
Row 1: ['978-1234567890', 'Whiskers of Wisdom: Tales from Feline Philosophers', 'Penelope Wainwright', 'English', '256', 'Cats', '$19.99', 'Buy now']
Row 2: ['978-2345678901', "Purrfectly Pawesome: A Cat's Life", 'Jasper Sterling', 'English', '192', 'Cats', '$15.99', 'Buy now']
Row 3: ['978-3456789012', 'Cat Tales: Adventures in Whiskerland', 'Penelope Wainwright', 'English', '320', 'Cats', '$21.99', 'Buy now']
Row 4: ['978-4567890123', 'The Enigmatic Paws: Mysteries of Meowville', 'Maximilian Thorne', 'English', '288', 'Cats', '$17.99', 'Buy now']
Row 5: ['978-5678901234', 'Cats in Wonderland', 'Isadora Harrington', 'English', '224', 'Cats', '$16.99', 'Buy now']
Row 6: ['978-6789012345', 'Whisker Wisdom: Life Lessons from Feline Sages', 'Penelope Wainwright', 'English', '288', 'Cats', '$20.99', 'Buy now']
Row 7: ['978-7890123456', 'Catnip Chronicles: A Purrfect Journey', 'Jasper Sterling', 'English', '192', 'Cats', '$14.99', 'Buy now']
Row 8: ['978-8901234567', 'Cat-astrophe

In [93]:
rows = soup.find_all('tr')

authors = []
for tr in rows:
    tds = tr.find_all('td')
    if len(tds) > 2:
        author = tds[2].get_text(strip=True)
        authors.append(author)

unique_authors = sorted(set(authors))

print("Authors:")
for author in unique_authors:
    print(author)

Authors:
Benjamin Barkley
Celeste Nightshade
Isadora Harrington
Jasper Sterling
Maximilian Thorne
Oliver Obedience
Penelope Wainwright
Ruby Ruffington
Seraphina Montague
Sophie Shepherd


In [94]:
topics = []
for tr in rows:
    tds = tr.find_all('td')
    if len(tds) > 5:
        topic = tds[5].get_text(strip=True)
        topics.append(topic)

unique_topics = sorted(set(topics))

print("Topics:")
for topic in unique_topics:
    print(topic)

Topics:
Cats
Dogs


In [96]:
data = []
for row in soup.find_all('tr'):
    cells = [cell.get_text(strip=True) for cell in row.find_all('td')]
    if cells:
        data.append(cells)

columns = ['ISBN', 'Title', 'Author', 'Language', 'Pages', 'Topic', 'Price', 'Action']
df = pd.DataFrame(data, columns=columns)

In [100]:
top_author = df['Author'].value_counts().idxmax()
count = df['Author'].value_counts().max()

print(f"{top_author} has the most books ({count})")

Penelope Wainwright has the most books (4)


In [101]:
top_topic = df['Topic'].value_counts().idxmax()
count = df['Topic'].value_counts().max()

print(f"{top_topic} is the most popular topic ({count} books)")

Cats is the most popular topic (10 books)


In [103]:
df['Price'] = df['Price'].replace('[\$,]', '', regex=True).astype(float)

avg_prices = df.groupby('Topic')['Price'].mean()

most_expensive_topic = avg_prices.idxmax()
highest_avg = avg_prices.max()

print(f"{most_expensive_topic} is the most expensive topic on average (${highest_avg:.2f})")

Dogs is the most expensive topic on average ($26.59)


In [104]:
df['Pages'] = pd.to_numeric(df['Pages'], errors='coerce')

avg_pages = df.groupby('Topic')['Pages'].mean()

most_pages_topic = avg_pages.idxmax()
highest_avg = avg_pages.max()

print(f"{most_pages_topic} books have the most pages on average ({highest_avg:.0f} pages)")

Dogs books have the most pages on average (256 pages)
