In [57]:
# Import necessary libraries
import requests                    # For sending HTTP requests to web pages
from bs4 import BeautifulSoup      # For parsing HTML content
import pandas as pd                # For storing data in tabular format (DataFrame)
import random                      # For selecting random user-agents to avoid being blocked
import time                        # (Optional) Can be used to slow down requests to avoid detection

In [31]:
# List of different user-agents (helps in mimicking browser behavior and reduces the chance of getting blocked)
user_agents = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
]

In [33]:
# Randomly select a user-agent for the request
headers = { "User-Agent" : random.choice(user_agents),
            "Accept-Language" : "en-Us,en;q=0.9" }

In [43]:
# Lists to store scraped data
product_name = []
product_price = []
product_rating = []

# Loop through 3 pages of Amazon results (you can increase the range)
for i in range(1,4):
    url = f"https://www.amazon.in/s?i=kitchen&bbn=81107433031&rh=n%3A81107433031%2Cp_85%3A10440599031&page={i}&_encoding=UTF8&content-id=amzn1.sym.58c90a12-100b-4a2f-8e15-7c06f1abe2be&pd_rd_r=eb705f4e-d34b-456d-a496-b52f6602d46b&pd_rd_w=hwFSy&pd_rd_wg=MVPlH&qid=1745483311&xpid=thnXwLcmHk4-q&ref=sr_pg_2"
    
    # Send a GET request to fetch the HTML content of the search result page
    response = requests.get(url, headers = headers)
    soup = BeautifulSoup(response.content, 'html.parser')
   
    # Find all product links using the class names specific to Amazon layout
    links = soup.find_all("a", attrs={"class": "a-link-normal s-line-clamp-4 s-link-style a-text-normal"})

    # Visit each product's individual page
    for i, link in enumerate(links):
        href = link.get('href')
        product_link = "https://www.amazon.in" + href
        #print(product_link)

        # Send a GET request to fetch the product detail page
        new_url = requests.get(product_link, headers = headers)
        new_soup = BeautifulSoup(new_url.content, "html.parser")

        # Extract product title, price, and rating (if available)
        name = new_soup.find("span", attrs={"id":"productTitle"})
        price = new_soup.find("span", attrs = {"class":"a-price-whole"})
        rating = new_soup.find("span", attrs = {"class":"a-icon-alt"})

        # Clean and append the data to the respective lists
        product_name.append(name.text.strip() if name else "NA")
        product_price.append(price.text.strip().rstrip('.') if price else "NA")
        product_rating.append(rating.text.strip() if rating else "NA")

# Print summary of collected data
print(f"Total number of records \n product name: {len(product_name)} \n product price: {len(product_price)} \n product rating: {len(product_rating)}")

Total number of records 
 product name: 82 
 product price: 82 
 product rating: 82


In [52]:
# Create a DataFrame using the scraped data
data = {
    "product_name": product_name,
    "price": product_price,
    "ratings": product_rating
}
df = pd.DataFrame(data)

# Save the data to a CSV file
df.to_csv("amazon_products_multiple_page.csv", index=False)

print("Scraping completed. Data saved to 'amazon_products_multiple_page.csv'")

Scraping completed. Data saved to 'amazon_products_multiple_page.csv'


In [53]:
# Display first few rows (optional for debugging)
df.head()

Unnamed: 0,product_name,price,ratings
0,"Samsung 183 L, 4 Star, Digital Inverter, Direc...",16390,4.3 out of 5 stars
1,"Haier 165 L, 1 Star, Direct-Cool Single Door R...",10990,3.9 out of 5 stars
2,Whirlpool 184 L 2 Star Direct-Cool Single Door...,12190,4.0 out of 5 stars
3,"Haier 190 L, 4 Star, Direct Cool Single Door R...",14990,4.1 out of 5 stars
4,Godrej 180 L 2 Star Advanced Capillary Technol...,12390,4.0 out of 5 stars
