# New Section

In [3]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

def scrape_product_page(product_url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    response = requests.get(product_url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        description_element = soup.find("span", {"id": "productTitle"})
        description = description_element.text.strip() if description_element else "N/A"

        asin_element = soup.find("th", text="ASIN")
        asin = asin_element.find_next("td").text.strip() if asin_element else "N/A"

        product_description_element = soup.find("div", {"id": "productDescription"})
        product_description = product_description_element.text.strip() if product_description_element else "N/A"

        manufacturer_element = soup.find("a", {"id": "bylineInfo"})
        manufacturer = manufacturer_element.text.strip() if manufacturer_element else "N/A"

        return {
            "Description": description,
            "ASIN": asin,
            "Product Description": product_description,
            "Manufacturer": manufacturer
        }

    return None

def scrape_amazon_products(url, num_pages=30, max_products=200):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    all_products = []

    for page in range(1, num_pages + 1):
        page_url = url + "&page=" + str(page)
        response = requests.get(page_url, headers=headers)

        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            products = soup.find_all("div", {"data-component-type": "s-search-result"})

            for product in products:
                product_url = "https://www.amazon.in" + product.find("a", {"class": "a-link-normal"})["href"]
                product_name = product.find("span", {"class": "a-text-normal"}).text.strip()
                product_price = product.find("span", {"class": "a-offscreen"}).text.strip()
                product_price = re.sub(r"[^\d.]", "", product_price)

                rating = product.find("span", {"class": "a-icon-alt"})
                if rating:
                    rating = re.search(r'(\d+\.\d+)', rating.text).group(1)
                else:
                    rating = "N/A"

                num_reviews = product.find("span", {"class": "a-size-base"}).text.strip()
                num_reviews = re.sub(r"[^\d]", "", num_reviews)

                product_data = {
                    "Product URL": product_url,
                    "Product Name": product_name,
                    "Product Price": product_price,
                    "Rating": rating,
                    "Number of Reviews": num_reviews
                }

                additional_info = scrape_product_page(product_url)
                if additional_info:
                    product_data.update(additional_info)

                all_products.append(product_data)

                if len(all_products) >= max_products:
                    break

            if len(all_products) >= max_products:
                break

    return all_products

if __name__ == "__main__":
    base_url = "https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_"
    num_pages_to_scrape = 30
    max_products_to_scrape = 200

    scraped_products = scrape_amazon_products(base_url, num_pages_to_scrape, max_products_to_scrape)

    df = pd.DataFrame(scraped_products)
    output_file = "amazon_products.csv"
    df.to_csv(output_file, index=False)

    print(f"Data exported to '{output_file}' file.")


  asin_element = soup.find("th", text="ASIN")


Data exported to 'amazon_products.csv' file.
