In [3]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_product_listings(url, num_pages):
    all_products = []
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0;Win64) AppleWebkit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'
    }
    
    for page in range(1, num_pages+1):
        page_url = f"{url}&page={page}"
        response = requests.get(page_url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        products = soup.find_all('div', {'data-component-type': 's-search-result'})
        
        for product in products:
            product_info = {}
            
            # Extract product URL
            product_link = product.find('a', class_='a-link-normal s-no-outline').get('href')
            product_url = 'https://www.amazon.in' + product_link
            product_info['Product URL'] = product_url
            
            # Extract product name
            product_name = product.find('span', class_='a-size-medium a-color-base a-text-normal').text
            product_info['Product Name'] = product_name
            
            # Extract product price
            product_price_element = product.find('span', class_='a-offscreen')
            if product_price_element:
                product_price = product_price_element.text
            else:
                product_price = 'Price not available'
            product_info['Product Price'] = product_price
            
            # Extract rating
            rating = product.find('span', class_='a-icon-alt')
            if rating:
                product_info['Rating'] = rating.text
            else:
                product_info['Rating'] = 'Not available'
            
            # Extract number of reviews
            num_reviews = product.find('span', class_='a-size-base').text
            product_info['Number of Reviews'] = num_reviews
            
            all_products.append(product_info)
    
    return all_products


def scrape_product_details(product_urls):
    all_product_details = []
    
    for url in product_urls:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        product_info = {}
        
        # Extract description
        description_element = soup.find('div', id='productDescription')
        if description_element:
            description = description_element.text.strip()
        else:
            description = 'Description not available'
        product_info['Description'] = description
        
        # Extract ASIN
        asin_element = soup.find('th', text='ASIN')
        if asin_element:
            asin = asin_element.find_next_sibling('td').text.strip()
        else:
            asin = 'ASIN not available'
        product_info['ASIN'] = asin
        
        # Extract product description
        product_desc_element = soup.find('div', id='feature-bullets')
        if product_desc_element:
            product_desc = product_desc_element.text.strip()
        else:
            product_desc = 'Product description not available'
        product_info['Product Description'] = product_desc
        
        # Extract manufacturer
        manufacturer_element = soup.find('a', id='bylineInfo')
        if manufacturer_element:
            manufacturer = manufacturer_element.text.strip()
        else:
            manufacturer = 'Manufacturer not available'
        product_info['Manufacturer'] = manufacturer
        
        all_product_details.append(product_info)
    
    return all_product_details


# Scrape product listings
url = 'https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_'
num_pages = 20
product_listings = scrape_product_listings(url, num_pages)

# Extract product URLs
product_urls = [listing['Product URL'] for listing in product_listings]

# Scrape product details
product_details = scrape_product_details(product_urls[:200])  # Limiting to 200 URLs as requested

# Combine product listings and details
all_data = []
for i in range(len(product_listings)):
    product_info = {**product_listings[i], **product_details[i]}
    all_data.append(product_info)

# Export data to CSV file
csv_file = 'amazon_products.csv'
fieldnames = ['Product URL', 'Product Name', 'Product Price', 'Rating', 'Number of Reviews',
              'Description', 'ASIN', 'Product Description', 'Manufacturer']
with open(csv_file, 'w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(all_data)

print(f"Scraping completed and data exported to '{csv_file}'.")


Scraping completed and data exported to 'amazon_products.csv'.
