# Part 1: Scraping Product Listings

In [92]:
pip install requests beautifulsoup4




In [2]:
import pandas as pd

# import the required libraries:

In [3]:
import requests
from bs4 import BeautifulSoup
import csv

# Create a function to scrape the product listings from a given URL:

In [120]:
def scrape_product_listings(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    product_listings = []
    
    # Extract product details from the webpage
    products = soup.find_all('div', {'data-component-type': 's-search-result'})
    for product in products:
        product_data = {}
        
        # Extract the product URL
        product_link = product.find('a', {'class': 'a-link-normal s-no-outline'})
        if product_link:
            product_data['URL'] = 'https://www.amazon.in' + product_link['href']
        
        # Extract the product name
        product_name = product.find('span', {'class': 'a-size-medium a-color-base a-text-normal'})
        if product_name:
            product_data['Name'] = product_name.text.strip()
        
        # Extract the product price
        product_price = product.find('span', {'class': 'a-offscreen'})
        if product_price:
            product_data['Price'] = product_price.text.strip()
        
        # Extract the product rating
        product_rating = product.find('span', {'class': 'a-icon-alt'})
        if product_rating:
            product_data['Rating'] = product_rating.text.strip().split()[0]
        
        # Extract the number of reviews
        product_reviews = product.find('span', {'class': 'a-size-base'})
        if product_reviews:
            product_data['Reviews'] = product_reviews.text.strip().replace(',', '')
        
        product_listings.append(product_data)
    
    return product_listings


# Create a function to scrape multiple pages of product listings:

In [121]:
def scrape_multiple_pages(base_url, num_pages):
    all_product_listings = []
    
    for page in range(1, num_pages + 1):
        url = f'{base_url}&page={page}'
        product_listings = scrape_product_listings(url)
        all_product_listings.extend(product_listings)
    
    return all_product_listings


# Call the function and save the scraped data to a CSV file:

In [122]:
import csv

base_url = 'https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_'
num_pages = 20

product_listings = scrape_multiple_pages(base_url, num_pages)

# Get the keys from the first dictionary in product_listings
keys = product_listings[0].keys()

# Update keys to include 'Rating' if it is present in the dictionary
if 'Rating' not in keys:
    keys = list(keys) + ['Rating']

filename = 'product_listings.csv'
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, keys)
    writer.writeheader()
    writer.writerows(product_listings)


In [98]:
d=pd.read_csv('product_listings.csv')
d

Unnamed: 0,URL,Name,Price,Rating,Reviews
0,https://www.amazon.in/Wesley-Milestone-Waterpr...,Wesley Milestone 2.0 Casual Waterproof Laptop ...,₹598,4.3,10923
1,https://www.amazon.in/American-Tourister-AMT-S...,American Tourister 32 Ltrs Black Casual Backpa...,"₹1,199",4.0,54205
2,https://www.amazon.in/American-Tourister-BACKP...,American Tourister VALEX BLUE LAPTOP BACKPACK ...,"₹1,299",4.2,204
3,https://www.amazon.in/Number-Backpack-Compartm...,FUR JADEN Anti Theft Number Lock Backpack Bag ...,₹699,4.0,4024
4,https://www.amazon.in/ADISA-Laptop-Backpack-Of...,ADISA 15.6 inch Laptop Backpack Office Bag Col...,₹499,3.9,802
5,https://www.amazon.in/Number-Backpack-Compartm...,FUR JADEN Anti Theft Number Lock Backpack Bag ...,₹699,4.0,4024
6,https://www.amazon.in/Half-Moon-Waterproof-Bac...,Half Moon 35L Water Resistant 15.6 inch Laptop...,₹529,3.9,15926
7,https://www.amazon.in/Skybags-Brat-Black-Casua...,Skybags Brat Black 46 Cms Casual Backpack,₹669,4.1,4547
8,https://www.amazon.in/Half-Moon-Resistant-Back...,Half Moon Large 37L Laptop Bag Backpack for me...,₹749,3.9,2764
9,https://www.amazon.in/FUR-JADEN-Leatherette-Po...,Fur Jaden Brown Textured Leatherette Stylish &...,₹899,4.3,5925


# 






# Part 2: Scraping Product Details
    -- scrape_product_urls
    -- scrape_product_details

# Define a function to scrape the product URLs from the search result page

In [131]:
def scrape_product_urls(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract the product URLs from the search result page
    product_urls = []
    products = soup.select('.s-result-item.s-asin')
    for product in products:
        product_url = product.select('.a-link-normal')[0]['href']
        product_urls.append('https://www.amazon.in' + product_url)

    return product_urls


In [132]:
def scrape_product_details(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract the required details using appropriate CSS selectors or XPaths
    # Image
    image = soup.select('.s-image-fixed-height img')[0]['src']

    # Product Name
    name = soup.select('.a-size-medium.a-color-base.a-text-normal')[0].get_text().strip()

    # Manufacturer
    manufacturer = soup.select('.a-size-medium.a-color-base.a-text-normal')[1].get_text().strip()

    # Return the scraped details
    return image, name, manufacturer


# Iterate over multiple pages and scrape the required details

In [133]:
def scrape_product_urls(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract the product URLs from the search result page
    product_urls = []
    products = soup.select('.s-result-item.s-asin')
    for product in products:
        # Image
        image = product.select('.s-image')[0]['src']

        # Product Name
        name = product.select('.a-size-medium.a-color-base.a-text-normal')[0].get_text().strip()

        # Rating
        rating = product.select('.a-icon.a-icon-star-small.a-star-small-4-5.aok-align-bottom')
        rating = rating[0].find('span', class_='a-icon-alt').get_text().strip() if rating else ''

        # Price
        price = product.select('.a-price-whole')[0].get_text().strip()

        # URL
        url = 'https://www.amazon.in' + product.select('.a-link-normal')[0]['href']

        # Append the product URL and extracted details to the list
        product_urls.append({
            'url': url,
            'image': image,
            'name': name,
            'rating': rating,
            'price': price
        })

    return product_urls


In [None]:
import csv

# Rest of the code for scraping and storing data
# ...

# Specify the file name for the CSV
csv_file = 'product_details.csv'

# Write the data to the CSV file
with open(csv_file, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Name', 'URL', 'Image', 'Rating', 'Price', 'Description', 'ASIN', 'Manufacturer'])

    for details in product_details:
        description, asin, product_description, manufacturer = scrape_product_details(details['url'])
        writer.writerow([
            details['name'],
            details['url'],
            details['image'],
            details['rating'],
            details['price'],
            description,
            asin,
            manufacturer
        ])

In [12]:
d=pd.read_csv('product_details.csv')
d

Unnamed: 0,Name,URL,Image,Rating,Price,Description,ASIN,Manufacturer
0,Elegant Sport Leatherette (16 inch 28 L Casual...,https://www.amazon.in/Elegant-Leatherette-Wate...,product1.jpg,Empty,₹699,This Casual laptop backpack is for Men and Wom...,B0C4PS5JSM,urban tribe
1,uppercase SealPro Professional Laptop Backpack...,https://www.amazon.in/uppercase-Professional-r...,product2.jpg,4.7 out of 5,₹1900,This Casual laptop backpack is for Men and Wom...,B0BR179L65,Acefour Accessories Pvt. Ltd.Acefour Accessori...
2,Red Lemon BANGE Multifunctional Waterproof Ant...,https://www.amazon.in/Red-Lemon-Multifunctiona...,Product3.jpg,4.3 out of 5,₹ 1899,This sling bag is made of high-quality water-r...,B0B12VQFVC,Encon Impex Pvt Ltd
3,HEROZ Hammer Unisex Nylon 45 L Travel Laptop B...,https://www.amazon.in/HEROZ-Hammer-Backpack-Re...,Product3.jpg,4.3 out of 5,₹1499,The padded base-back and shoulder straps provi...,B07M82M8G7,Heroz
4,FUR JADEN Smart Tech Water Repellent Anti-Thef...,https://www.amazon.in/FUR-JADEN-Repellent-Anti...,Product4.jpg,4.9 out of 5,₹1299,Stay organized all day with Fur Jaden's durabl...,B0C2J3DQTF,Fur Jaden
5,Lunar's Comet 35L Water Resistant Travel Bagpa...,https://www.amazon.in/Lunars-Comet-Resistant-C...,Product5.jpg,4.0 out of 5,₹ 849,Lunar Coment 35L Backpack is the ideal school/...,B07WNFN1XS,Lunar's
6,Red Lemon Unisex-adult Bange Series Rhombus Sh...,https://www.amazon.in/Red-Lemon-Unisex-adult-W...,Product6.jpg,4.4 out of 5,₹ 1999,This Red Lemon stylish chest bag is made of wa...,B09VGNTDRZ,Red lemon.
7,Cosmus Vogue Casual Laptop Backpack with USB C...,https://www.amazon.in/Cosmus-Casual-Backpack-C...,Product7.jpg,4.2 out of 5,₹1358,Stylish and Durable Spacious Laptop Backpack f...,B08D7D21FV,COSMUSBAGS PVT.LTD
8,"ARCTIC HUNTER 17"" Laptop Bag 32L (Model Bold) ...",https://www.amazon.in/Waterproof-Expandable-Bo...,Product8.jpg,4.5 out of 5,₹3636,We at Walkent strive to provide multifunction ...,B09T738CK1,Walkent (OEM Guangzhou Leading Wolf Leather Pr...
9,Gear CarryOn 16L Foldable Water Resistant Scho...,https://www.amazon.in/GEAR-Black-Blue-Backpack...,Product9 .jpg,3.7 out of 5,₹199,Gear bags are designed for the quick commute a...,B019HA8AYG,Gear Merchandise PVT LTD.
