In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Function to fetch the list URLs from a single page of the "Recently Active Lists"
def fetch_list_urls(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    # Send a GET request to the Goodreads page
    response = requests.get(url, headers=headers)
    
    # Check if the response is OK (status code 200)
    if response.status_code != 200:
        print(f"Failed to retrieve the webpage: {url}")
        return []
    
    # Parse the HTML content of the page
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find the list containers and their respective URLs
    list_urls = []
    for list_container in soup.find_all('a', class_='listTitle'):
        list_name = list_container.get_text(strip=True)
        list_url = 'https://www.goodreads.com' + list_container['href']  # Construct the full URL
        list_urls.append((list_name, list_url))
    
    return list_urls

# Function to fetch book titles from a given list URL
def fetch_books_from_list(list_name, list_url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    response = requests.get(list_url, headers=headers)
    
    if response.status_code != 200:
        print(f"Failed to retrieve the list: {list_url}")
        return []
    
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find all book titles in the list
    book_titles = []
    for book in soup.find_all('a', class_='bookTitle'):
        book_title = book.get_text(strip=True)
        book_titles.append(book_title)
    
    return book_titles

# Function to scrape list URLs and then scrape books from those lists
def scrape_lists(base_url, total_pages):
    all_books = []
    
    for page_num in range(1, total_pages + 1):
        url = f"{base_url}{page_num}"
        print(f"Scraping page {page_num}: {url}")
        
        # Fetch list URLs from the current page
        list_urls = fetch_list_urls(url)
        
        if list_urls:
            for list_name, list_url in list_urls:
                print(f"Fetching books from list: {list_name} ({list_url})")
                
                # Fetch books from the list
                books = fetch_books_from_list(list_name, list_url)
                all_books.extend([(list_name, book) for book in books])
        else:
            print(f"No lists found on page {page_num}")
        
        # Pause to avoid overwhelming the server
        time.sleep(2)
    
    return all_books

# Function to save titles to a CSV file using pandas
def save_to_csv(book_data, filename):
    # Create a pandas DataFrame from the list of (list_name, book_title)
    df = pd.DataFrame(book_data, columns=['List Name', 'Book Title'])
    
    # Save the DataFrame to a CSV file
    df.to_csv(filename, index=False)

# Base URL of the Goodreads "Recently Active Lists" with pagination
base_url = 'https://www.goodreads.com/list/recently_active_lists?page='

# Total number of pages to scrape (1 to 100)
total_pages = 10

# Scrape book titles from the first 100 pages
book_data = scrape_lists(base_url, total_pages)

# Saving titles to a CSV file if any books are found
if book_data:
    save_to_csv(book_data, 'goodreads_recently_active_books.csv')
    print(f'Successfully saved {len(book_data)} books to CSV file.')
else:
    print("No books were scraped.")


Scraping page 1: https://www.goodreads.com/list/recently_active_lists?page=1
Fetching books from list: Good Novels With Regretful Heroes (https://www.goodreads.com/list/show/121977.Good_Novels_With_Regretful_Heroes)
Fetching books from list: 100 Books to Read in a Lifetime: Readers' Picks (https://www.goodreads.com/list/show/69635.100_Books_to_Read_in_a_Lifetime_Readers_Picks)
Fetching books from list: Christmas Wish List (https://www.goodreads.com/list/show/28467.Christmas_Wish_List)
Fetching books from list: MUST READS!!! (https://www.goodreads.com/list/show/19106.MUST_READS_)
Fetching books from list: Christmas Stocking Stuffers (https://www.goodreads.com/list/show/14962.Christmas_Stocking_Stuffers)
Fetching books from list: Books That Should Be Made Into Movies (https://www.goodreads.com/list/show/1043.Books_That_Should_Be_Made_Into_Movies)
Fetching books from list: BookTok Romance Recommendations (https://www.goodreads.com/list/show/168061.BookTok_Romance_Recommendations)
Fetching