# New Section

In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

def scrape_amazon_products(url, num_pages=20):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    all_products = []

    for page in range(1, num_pages + 1):
        page_url = url + "&page=" + str(page)
        response = requests.get(page_url, headers=headers)

        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            products = soup.find_all("div", {"data-component-type": "s-search-result"})

            for product in products:
                product_url = "https://www.amazon.in" + product.find("a", {"class": "a-link-normal"})["href"]
                product_name = product.find("span", {"class": "a-text-normal"}).text.strip()

                product_price = product.find("span", {"class": "a-offscreen"}).text.strip()
                product_price = re.sub(r"[^\d.]", "", product_price)

                rating = product.find("span", {"class": "a-icon-alt"})
                if rating:
                    rating = re.search(r'\d\.\d', rating.text).group()
                else:
                    rating = "N/A"

                num_reviews = product.find("span", {"class": "a-size-base"}).text.strip()
                num_reviews = re.sub(r"[^\d]", "", num_reviews)

                product_data = {
                    "Product URL": product_url,
                    "Product Name": product_name,
                    "Product Price": product_price,
                    "Rating": rating,
                    "Number of Reviews": num_reviews
                }

                all_products.append(product_data)

    return all_products

if __name__ == "__main__":
    base_url = "https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_"
    num_pages_to_scrape = 20

    scraped_products = scrape_amazon_products(base_url, num_pages_to_scrape)
    df = pd.DataFrame(scraped_products)

    print(df)


                                          Product URL  \
0   https://www.amazon.in/American-Tourister-AMT-S...   
1   https://www.amazon.in/Genie-compartments-Light...   
2   https://www.amazon.in/AirCase-C34-Laptop-Backp...   
3   https://www.amazon.in/Martucci-Waterproof-Back...   
4   https://www.amazon.in/Safari-Spartan-Water-Res...   
5   https://www.amazon.in/Wildcraft-Black-Backpack...   
6   https://www.amazon.in/Genie-Clara-Backpack-Wom...   
7   https://www.amazon.in/Shalimar-Capacity-Travel...   
8   https://www.amazon.in/Wildcraft-Ltrs-orange-Ru...   
9   https://www.amazon.in/Wildcraft-Packs18-Laptop...   
10  https://www.amazon.in/Number-Backpack-Compartm...   
11  https://www.amazon.in/Trajectory-Elegant-15-6-...   
12  https://www.amazon.in/Veneer-Backpack-Fashion-...   
13  https://www.amazon.in/Tinytot-School-Backpack-...   
14  https://www.amazon.in/Casual-Backpack-Charging...   
15  https://www.amazon.in/Genie-Ltrs-School-Backpa...   
16  https://www.amazon.in/gp/be