In [6]:
import nest_asyncio
import asyncio
import csv
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup

# Apply the nested asyncio patch
nest_asyncio.apply()

async def scrape_friend_urls(url, page):
    try:
        # Navigate to the user's friend list page
        await page.goto(url)

        # Wait for the friend table to load
        await page.wait_for_selector("#friendTable", timeout=10000)

        # Extract the page content
        content = await page.content()

        # Parse the content with BeautifulSoup
        soup = BeautifulSoup(content, 'html.parser')

        # Find all friend profile links
        friends = soup.select('table#friendTable td[width="30%"] a[rel="acquaintance"]')
        friend_urls = ["https://www.goodreads.com" + friend['href'] for friend in friends]

        return friend_urls

    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return []

async def main():
    # Define the base URL for the friend list
    base_url = "https://www.goodreads.com/friend/user/26278904-monika-ghosh?page="
    total_pages = 30  # Adjust based on the total number of pages in the friend list

    # Prepare CSV file to save the friend list
    with open('friend_urls.csv', mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Friend URL'])  # Write header

        async with async_playwright() as p:
            # Launch a browser
            browser = await p.chromium.launch(headless=False)  # Use headless=True for headless mode
            context = await browser.new_context()
            page = await context.new_page()

            # Log in to Goodreads
            await page.goto("https://www.goodreads.com/ap/signin?language=en_US&openid.assoc_handle=amzn_goodreads_web_na&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.goodreads.com%2Fap-handler%2Fsign-in&siteState=eyJyZXR1cm5fdXJsIjoiaHR0cHM6Ly93d3cuZ29vZHJlYWRzLmNvbS8ifQ%3D%3D")
            await page.wait_for_selector("input[name='email']", timeout=5000)
            await page.fill("input[name='email']", "robiulriyadh66@gmail.com")  # Your email
            await page.fill("input[name='password']", "V!MZUhP8TL39-jH")        # Your password
            await page.click("input[type='submit']")
            await page.wait_for_selector(".siteHeader__topLevelLink", timeout=30000)

            # Iterate over all pages of the friend list
            for page_number in range(1, total_pages + 1):
                current_url = f"{base_url}{page_number}&skip_mutual_friends=false"
                print(f"Scraping: {current_url}")

                # Scrape friend URLs from the current page
                friend_urls = await scrape_friend_urls(current_url, page)

                # Write the URLs to the CSV file
                for friend_url in friend_urls:
                    writer.writerow([friend_url])
                print(f"Scraped {len(friend_urls)} friends from page {page_number}")

            # Close the browser
            await browser.close()

# Run the async function
await main()


Scraping: https://www.goodreads.com/friend/user/26278904-monika-ghosh?page=1&skip_mutual_friends=false
Scraped 30 friends from page 1
Scraping: https://www.goodreads.com/friend/user/26278904-monika-ghosh?page=2&skip_mutual_friends=false
Scraped 30 friends from page 2
Scraping: https://www.goodreads.com/friend/user/26278904-monika-ghosh?page=3&skip_mutual_friends=false
Scraped 30 friends from page 3
Scraping: https://www.goodreads.com/friend/user/26278904-monika-ghosh?page=4&skip_mutual_friends=false
Scraped 30 friends from page 4
Scraping: https://www.goodreads.com/friend/user/26278904-monika-ghosh?page=5&skip_mutual_friends=false
Scraped 30 friends from page 5
Scraping: https://www.goodreads.com/friend/user/26278904-monika-ghosh?page=6&skip_mutual_friends=false
Scraped 30 friends from page 6
Scraping: https://www.goodreads.com/friend/user/26278904-monika-ghosh?page=7&skip_mutual_friends=false
Scraped 30 friends from page 7
Scraping: https://www.goodreads.com/friend/user/26278904-monik