In [5]:
import nest_asyncio
import asyncio
import pandas as pd
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup
import csv

# Apply the nested asyncio patch
nest_asyncio.apply()

async def scrape_details(url, page):
    try:
        # Navigate to the book page URL
        await page.goto(url)

        # Wait for necessary elements to load
        await page.wait_for_selector(".BookPageTitleSection__title", timeout=10000)

        # Extract the page content as HTML
        content = await page.content()

        # Parse the content with Beautiful Soup
        soup = BeautifulSoup(content, 'html.parser')

        # Extract book title
        title_element = soup.find('h1', class_='Text__title1')
        title = title_element.get_text(strip=True) if title_element else "N/A"

        # Extract author name
        author_element = soup.find('span', class_='ContributorLink__name')
        author = author_element.get_text(strip=True) if author_element else "N/A"

        # Extract genres (first three only)
        genre_elements = soup.select('span.BookPageMetadataSection__genreButton span.Button__labelItem')
        genres = [genre.get_text(strip=True) for genre in genre_elements[:3]]
        genre_text = ", ".join(genres) if genres else "N/A"



        return title, author, genre_text

    except Exception as e:
        print(f"Error scraping {url}: {e}")
    return "N/A", "N/A", "N/A"

async def main():
    # Read the URLs from the CSV file
    urls_df = pd.read_csv('books_with_ids.csv')
    urls_df = urls_df.drop_duplicates()  # Remove duplicate rows
    urls = urls_df['URL'].tolist()  # Replace 'url' with the actual column name in your CSV

    # Prepare CSV file to save results
    with open('scraped_details.csv', mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['URL', 'Title', 'Author', 'Genres'])

        async with async_playwright() as p:
            # Launch a browser
            browser = await p.chromium.launch(headless=False)  # Use headless=True to run without GUI
            context = await browser.new_context()
            page = await context.new_page()

            # Log in to Goodreads (use the correct URL and credentials as needed)
            await page.goto("https://www.goodreads.com/ap/signin?language=en_US&openid.assoc_handle=amzn_goodreads_web_na&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.goodreads.com%2Fap-handler%2Fsign-in&siteState=eyJyZXR1cm5fdXJsIjoiaHR0cHM6Ly93d3cuZ29vZHJlYWRzLmNvbS8ifQ%3D%3D")
            await page.wait_for_selector("input[name='email']", timeout=5000)
            await page.fill("input[name='email']", "robiulriyadh66@gmail.com")  # Replace with your email
            await page.fill("input[name='password']", "V!MZUhP8TL39-jH")        # Replace with your password
            await page.click("input[type='submit']")
            await page.wait_for_selector(".siteHeader__topLevelLink", timeout=30000)

            # Iterate over each URL and scrape details
            for url in urls:
                title, author, genres = await scrape_details(url, page)
                writer.writerow([url, title, author, genres])
                print(f"Scraped {url}: Title: {title}, Author: {author}, Genres: {genres}")

            # Close the browser
            await browser.close()

# Run the async function
await main()


Scraped https://www.goodreads.com/book/show/617097.Pather_Panchali: Title: Pather Panchali: Song of the Road, Author: Bibhutibhushan Bandyopadhyay, Genres: Fiction, Classics, India
Scraped https://www.goodreads.com/book/show/7936528: Title: চাঁদের পাহাড়, Author: Bibhutibhushan Bandyopadhyay, Genres: Adventure, Fiction, Classics
Scraped https://www.goodreads.com/book/show/2426848._: Title: ব্যোমকেশ সমগ্র, Author: Sharadindu Bandyopadhyay, Genres: Detective, Mystery, Fiction
Scraped https://www.goodreads.com/book/show/244524.The_Complete_Adventures_of_Feluda_Vol_1: Title: The Complete Adventures of Feluda, Vol. 1, Author: Satyajit Ray, Genres: Mystery, Fiction, Detective
Scraped https://www.goodreads.com/book/show/1268536.Sesher_Kobita_the_Last_Poem: Title: Sesher Kobita, the Last Poem, Author: Rabindranath Tagore, Genres: Fiction, Romance, Classics
Scraped https://www.goodreads.com/book/show/670686._: Title: শঙ্কু সমগ্র, Author: Satyajit Ray, Genres: Science Fiction, Adventure, Fiction