# Amazon Web scraping using python.

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Set the base URL for the product listings
base_url = 'https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2BC283&ref=sr_pg_'

# Define the number of pages to scrape
num_pages = 20

# Initialize empty lists to store scraped data
product_urls = []
product_names = []
product_prices = []
ratings = []
num_reviews = []
descriptions = []
asins = []
product_descriptions = []
manufacturers = []

# Scrape data from each page
for page in range(1, num_pages+1):
    url = base_url + str(page)
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract data from the product listing page
    listings = soup.find_all('div', {'data-component-type': 's-search-result'})

    for listing in listings:
        # Extract product URL, name, price, rating, and number of reviews
        product_url = 'https://www.amazon.in' + listing.find('a', {'class': 'a-link-normal'})['href']
        product_urls.append(product_url)
        product_name = listing.find('span', {'class': 'a-size-medium'}).text.strip()
        product_names.append(product_name)
        product_price = listing.find('span', {'class': 'a-price-whole'}).text.strip()
        product_prices.append(product_price)
        rating = listing.find('span', {'class': 'a-icon-alt'}).text.strip().split()[0]
        ratings.append(rating)
        num_review = listing.find('span', {'class': 'a-size-base'}).text.strip().replace(',', '')
        num_reviews.append(num_review)

        # Hit the product URL and extract additional data
        product_response = requests.get(product_url)
        product_soup = BeautifulSoup(product_response.content, 'html.parser')

        description_element = product_soup.find('div', {'id': 'productDescription'})
        description = description_element.text.strip() if description_element else ''
        descriptions.append(description)

        asin_element = product_soup.find('th', text='ASIN')
        asin = asin_element.find_next('td').text.strip() if asin_element else ''
        asins.append(asin)

        product_description_element = product_soup.find('div', {'id': 'productDescription'})
        product_description = product_description_element.text.strip() if product_description_element else ''
        product_descriptions.append(product_description)

        manufacturer_element = product_soup.find('th', text='Manufacturer')
        manufacturer = manufacturer_element.find_next('td').text.strip() if manufacturer_element else ''
        manufacturers.append(manufacturer)

# Create a pandas DataFrame to store the scraped data
data = {
    'Product URL': product_urls,
    'Product Name': product_names,
    'Product Price': product_prices,
    'Rating': ratings,
    'Number of Reviews': num_reviews,
    'Description': descriptions,
    'ASIN': asins,
    'Product Description': product_descriptions,
    'Manufacturer': manufacturers
}
df = pd.DataFrame(data)

# Export the data to a CSV file
df.to_csv('amazon_products.csv', index=False)
