In [13]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

# The URL of the website that contains a frame or iframe to be scraped.
url = "https://www.scrapethissite.com/pages/frames/"

# Sending an HTTP GET request to the specified URL.
response = requests.get(url)
response.raise_for_status()

# Parsing the HTML content of the response using BeautifulSoup.
soup = BeautifulSoup(response.text, 'html.parser')

# Find the <iframe> element.
iframe = soup.find('iframe')

# Check if the iframe was found.
if iframe:
    # Extract the 'src' attribute (the source URL of the iframe).
    iframe_src = iframe.get('src')
    print(f"Iframe URL found: {iframe_src}")

    # Construct the full URL for the iframe's source.
    iframe_url = urljoin(url, iframe_src)

    # Send an HTTP GET request to the iframe's URL to fetch its content.
    iframe_response = requests.get(iframe_url)
    iframe_response.raise_for_status()

    # Parse the HTML content of the iframe using BeautifulSoup.
    iframe_soup = BeautifulSoup(iframe_response.text, 'html.parser')

    # Find all <h3> elements with the class 'family-name' within the iframe's HTML.
    turtles = iframe_soup.find_all('h3', class_='family-name')
    turtle_bio = iframe_soup.find_all('a', class_='btn btn-default btn-xs')
    print(turtle_bio)


    # Open a CSV file for writing.
    with open('turtle_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
        # Create a CSV writer object.
        writer = csv.DictWriter(csvfile, fieldnames=['name', 'bio'])

        # Write the header to the CSV file.
        writer.writeheader()

        # Loop through each turtle and write the data to the CSV file.
        for turtle in turtles:
            writer.writerow({'name': turtle.text.strip()})

    print("Data saved as turtle_data.csv")
else:
    # If no iframe is found, print a message.
    print("Iframe not found!")


Iframe URL found: /pages/frames/?frame=i
Data saved as turtle_data.csv
