In [1]:
import requests
from bs4 import BeautifulSoup
import csv

# Function to scrape product data from an Amazon product listing page
def scrape_amazon_products(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134"
    }
    
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        products = soup.find_all('div', {'data-component-type': 's-search-result'})

        product_data = []
        
        for product in products:
            product_url = 'https://www.amazon.in' + product.find('a', {'class': 'a-link-normal'})['href']
            
            product_name_elem = product.find('span', {'class': 'a-text-normal'})
            product_name = product_name_elem.text.strip() if product_name_elem else 'N/A'
            
            product_price_elem = product.find('span', {'class': 'a-price-whole'})
            product_price = product_price_elem.text.strip() if product_price_elem else 'N/A'
            
            rating_elem = product.find('span', {'class': 'a-icon-alt'})
            rating = rating_elem.text.strip() if rating_elem else 'N/A'
            
            num_reviews_elem = product.find('span', {'class': 'a-size-base'})
            num_reviews = num_reviews_elem.text.strip() if num_reviews_elem else 'N/A'
            
            product_data.append([product_url, product_name, product_price, rating, num_reviews])
            
        return product_data
    else:
        return None

# Function to scrape multiple pages
def scrape_multiple_pages(base_url, num_pages):
    all_product_data = []

    for page in range(1, num_pages + 1):
        url = f'{base_url}&page={page}'
        scraped_data = scrape_amazon_products(url)
        if scraped_data:
            all_product_data.extend(scraped_data)
    
    return all_product_data

# Main function to scrape and export data
def main():
    base_url = 'https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_1'
    num_pages = 20
    
    scraped_data = scrape_multiple_pages(base_url, num_pages)

    if scraped_data:
        # Export the data to a CSV file
        with open('amazon_product_data.csv', 'w', newline='', encoding='utf-8') as csv_file:
            csv_writer = csv.writer(csv_file)
            csv_writer.writerow(['Product URL', 'Product Name', 'Product Price', 'Rating', 'Number of Reviews'])
            csv_writer.writerows(scraped_data)
    
if __name__ == '__main__':
    main()


In [3]:
import requests
from bs4 import BeautifulSoup
import csv

# Function to scrape additional information from a product URL
def scrape_product_info(product_url):
    headers = {
        "User-Agent": "Your User Agent Here"
    }

    response = requests.get(product_url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract the desired information (Description, ASIN, Product Description, Manufacturer)
        description_elem = soup.find('div', {'id': 'productDescription'})
        description = description_elem.text.strip() if description_elem else 'N/A'
        
        asin_elem = soup.find('th', text='ASIN')
        asin = asin_elem.find_next('td').text.strip() if asin_elem else 'N/A'
        
        product_description_elem = soup.find('div', {'id': 'feature-bullets'})
        product_description = product_description_elem.text.strip() if product_description_elem else 'N/A'
        
        manufacturer_elem = soup.find('a', {'id': 'bylineInfo'})
        manufacturer = manufacturer_elem.text.strip() if manufacturer_elem else 'N/A'

        # Return the gathered information
        return [description, asin, product_description, manufacturer]
    else:
        return ['N/A', 'N/A', 'N/A', 'N/A']

# Main function to scrape and export additional data
def main():
    # Read the product URLs obtained from Part 1
    with open('amazon_product_data.csv', 'r', newline='', encoding='utf-8') as csv_file:
        csv_reader = csv.reader(csv_file)
        next(csv_reader)  # Skip the header row
        product_urls = [row[0] for row in csv_reader]

    additional_info_data = []

    for product_url in product_urls:
        additional_info = scrape_product_info(product_url)
        additional_info_data.append([product_url] + additional_info)

    if additional_info_data:
        # Export the additional data to a CSV file
        with open('amazon_product_info.csv', 'w', newline='', encoding='utf-8') as csv_file:
            csv_writer = csv.writer(csv_file)
            csv_writer.writerow(['Product URL', 'Description', 'ASIN', 'Product Description', 'Manufacturer'])
            csv_writer.writerows(additional_info_data)

if __name__ == '__main__':
    main()


  asin_elem = soup.find('th', text='ASIN')
