In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

url = "https://quotes.toscrape.com/"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')


quote_elements = soup.find_all('div', class_='quote')
text = [quote.find(class_='text').get_text() for quote in quote_elements]


author_elements = soup.find_all('div', class_='author')
author = [quote.find(class_='author').get_text() for quote in quote_elements]
tags = [', '.join(tag.get_text() for tag in quote.find_all('a', class_='tag')) for quote in quote_elements]




data = pd.DataFrame({
    "text": text,
    "author": author,
    "tags": tags
})
data



Unnamed: 0,text,author,tags
0,“The world as we have created it is a process ...,Albert Einstein,"change, deep-thoughts, thinking, world"
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling,"abilities, choices"
2,“There are only two ways to live your life. On...,Albert Einstein,"inspirational, life, live, miracle, miracles"
3,"“The person, be it gentleman or lady, who has ...",Jane Austen,"aliteracy, books, classic, humor"
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe,"be-yourself, inspirational"
5,“Try not to become a man of success. Rather be...,Albert Einstein,"adulthood, success, value"
6,“It is better to be hated for what you are tha...,André Gide,"life, love"
7,"“I have not failed. I've just found 10,000 way...",Thomas A. Edison,"edison, failure, inspirational, paraphrased"
8,“A woman is like a tea bag; you never know how...,Eleanor Roosevelt,misattributed-eleanor-roosevelt
9,"“A day without sunshine is like, you know, nig...",Steve Martin,"humor, obvious, simile"


In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pprint

# --- Setup ---
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(options=options)

wait = WebDriverWait(driver, 10)

driver.get("https://quotes.toscrape.com/")

quotes_list = []

while True:
    # Wait until quotes are present on the current page
    quotes_elements = wait.until(
        EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".quote"))
    )

    # Extract from the current page
    for q in quotes_elements:
        text = q.find_element(By.CLASS_NAME, "text").text
        author = q.find_element(By.CLASS_NAME, "author").text
        tags = [t.text for t in q.find_elements(By.CLASS_NAME, "tag")]
        quotes_list.append({"author": author, "text": text, "tags": tags})

    # Check if there is a Next button
    next_links = driver.find_elements(By.CSS_SELECTOR, ".pager .next a")
    if not next_links:
        break  # No more pages

    # Click Next and wait for the old content to become stale (ensures navigation completed)
    first_quote_on_page = quotes_elements[0]
    next_links[0].click()
    wait.until(EC.staleness_of(first_quote_on_page))  # page changed

# Show results
pprint.pprint(quotes_list)
driver.quit()


[{'author': 'Albert Einstein',
  'tags': ['change', 'deep-thoughts', 'thinking', 'world'],
  'text': '“The world as we have created it is a process of our thinking. It '
          'cannot be changed without changing our thinking.”'},
 {'author': 'J.K. Rowling',
  'tags': ['abilities', 'choices'],
  'text': '“It is our choices, Harry, that show what we truly are, far more '
          'than our abilities.”'},
 {'author': 'Albert Einstein',
  'tags': ['inspirational', 'life', 'live', 'miracle', 'miracles'],
  'text': '“There are only two ways to live your life. One is as though '
          'nothing is a miracle. The other is as though everything is a '
          'miracle.”'},
 {'author': 'Jane Austen',
  'tags': ['aliteracy', 'books', 'classic', 'humor'],
  'text': '“The person, be it gentleman or lady, who has not pleasure in a '
          'good novel, must be intolerably stupid.”'},
 {'author': 'Marilyn Monroe',
  'tags': ['be-yourself', 'inspirational'],
  'text': "“Imperfection is bea

In [3]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
import time, re

data = []

def digits_or_none(s):
    if not s: return None
    m = re.search(r"\d+", s)
    return m.group(0) if m else None

options = webdriver.ChromeOptions()
options.add_argument('--headless=new')
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(options=options)

url = "https://www.rottentomatoes.com/browse/movies_at_home/affiliates:netflix~critics:certified_fresh"
driver.get(url)
time.sleep(2)

soup = BeautifulSoup(driver.page_source, 'html.parser')
movies = driver.find_elements(By.CSS_SELECTOR, '[data-qa="discovery-media-list-item"]')

for movie in movies:
    # TITLE (same as your logic)
    title_el = movie.find_element(By.CSS_SELECTOR, '[data-qa="discovery-media-list-item-title"]')
    title = title_el.text.strip()

    # AUDIENCE SCORE from Shadow DOM
    try:
        aud_host = movie.find_element(By.CSS_SELECTOR, 'rt-text[slot="audienceScore"]')
        aud_shadow = aud_host.shadow_root
        aud_span = aud_shadow.find_element(By.CSS_SELECTOR, "span")
        audience_score = digits_or_none(aud_span.text)
    except:
        audience_score = None

    # RELEASE DATE (same logic)
    try:
        release_date = movie.find_element(By.CSS_SELECTOR, 'span.smaller').text.strip()
    except:
        release_date = "N/A"


    print(f"Title: {title}")
    print(f"Audience Score: {audience_score}%")
    print(f"Release Date: {release_date}")
    print("-------------")

    data.append({
        "Title": title,
        "Audience Score (%)": audience_score,
        "Release Date": release_date
    })




Title: A HOUSE OF DYNAMITE
Audience Score: 78%
Release Date: Streaming Oct 24, 2025
-------------
Title: The Perfect Neighbor
Audience Score: 79%
Release Date: Streaming Oct 17, 2025
-------------
Title: 28 Years Later
Audience Score: 63%
Release Date: Streaming Jul 29, 2025
-------------
Title: KPop Demon Hunters
Audience Score: 99%
Release Date: Streaming Jun 20, 2025
-------------
Title: His House
Audience Score: 73%
Release Date: Streaming Oct 30, 2020
-------------
Title: Host
Audience Score: 71%
Release Date: Streaming Jul 30, 2020
-------------
Title: Under the Shadow
Audience Score: 71%
Release Date: Streaming Jan 17, 2017
-------------
Title: Steve
Audience Score: 67%
Release Date: Streaming Oct 3, 2025
-------------
Title: The Thursday Murder Club
Audience Score: 53%
Release Date: Streaming Aug 28, 2025
-------------
Title: Past Lives
Audience Score: 93%
Release Date: Streaming Aug 22, 2023
-------------
Title: Beetlejuice Beetlejuice
Audience Score: 78%
Release Date: Streami

In [4]:
data

[{'Title': 'A HOUSE OF DYNAMITE',
  'Audience Score (%)': '78',
  'Release Date': 'Streaming Oct 24, 2025'},
 {'Title': 'The Perfect Neighbor',
  'Audience Score (%)': '79',
  'Release Date': 'Streaming Oct 17, 2025'},
 {'Title': '28 Years Later',
  'Audience Score (%)': '63',
  'Release Date': 'Streaming Jul 29, 2025'},
 {'Title': 'KPop Demon Hunters',
  'Audience Score (%)': '99',
  'Release Date': 'Streaming Jun 20, 2025'},
 {'Title': 'His House',
  'Audience Score (%)': '73',
  'Release Date': 'Streaming Oct 30, 2020'},
 {'Title': 'Host',
  'Audience Score (%)': '71',
  'Release Date': 'Streaming Jul 30, 2020'},
 {'Title': 'Under the Shadow',
  'Audience Score (%)': '71',
  'Release Date': 'Streaming Jan 17, 2017'},
 {'Title': 'Steve',
  'Audience Score (%)': '67',
  'Release Date': 'Streaming Oct 3, 2025'},
 {'Title': 'The Thursday Murder Club',
  'Audience Score (%)': '53',
  'Release Date': 'Streaming Aug 28, 2025'},
 {'Title': 'Past Lives',
  'Audience Score (%)': '93',
  'Rele

Find the movie with the name "Under the Shadow" or with the release date "Streaming Feb 17, 2022"

In [5]:
# Convert to Dataframe for Easier Access
df = pd.DataFrame(data)

# Find the movie with the above criteria
print(df[(df["Title"] == "Under the Shadow") | (df['Release Date'] == 'Streaming Feb 17, 2022')])


               Title Audience Score (%)            Release Date
6   Under the Shadow                 71  Streaming Jan 17, 2017
26           Sundown                 63  Streaming Feb 17, 2022
