Importing Necessary Libraries

In [31]:
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd

In [41]:
# Base URL for Amazon mobile search results
base_url = 'https://www.amazon.in/s?k=mobiles&page={}'

# Set up headers to mimic a Chrome browser
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}

Functions to scarp information

In [42]:
# Function to check for the next page
def has_next_page(soup):
    div_button = soup.find('div', class_='a-section a-text-center s-pagination-container')
    if div_button and div_button.find('a', class_="s-pagination-item s-pagination-next s-pagination-button s-pagination-separator"):
        return True
    return False

# Function to extract delivery text
def get_delivery_text(del_info):
    try:
        delivery = del_info.find_all('span', class_='a-color-base')
        return delivery[0].text.strip() if delivery else 'No delivery info'
    except AttributeError:
        return 'No delivery info'

# Function to extract free delivery date
def get_free_delivery_date(del_info):
    try:
        free_delivery_date = del_info.find('span', class_='a-color-base a-text-bold')
        return free_delivery_date.text.strip() if free_delivery_date else 'No free delivery date info'
    except AttributeError:
        return 'No free delivery date info'

# Function to extract fastest delivery date
def get_fast_delivery_date(del_info):
    try:
        fast_delivery_dates = del_info.find_all('span', class_='a-color-base a-text-bold')
        if len(fast_delivery_dates) > 1:  # Check if there's a second delivery date
            return fast_delivery_dates[1].text.strip()  # Get the second one
        else:
            return 'No fastest delivery date info'
    except AttributeError:
        return 'No fastest delivery date info'

# Function to extract product title
def get_title(product):
    try:
        return product.h2.text.strip()
    except AttributeError:
        return 'No title available'

# Function to extract product price
def get_price(product):
    try:
        price = product.find('span', 'a-price-whole')
        return price.text.strip() if price else 'Price not listed'
    except AttributeError:
        return 'Price not listed'

# Function to extract product rating
def get_rating(product):
    try:
        rating = product.find('span', {'class': 'a-icon-alt'})
        return rating.text.strip() if rating else 'Rating not available'
    except AttributeError:
        return 'Rating not available'

# Function to extract the number of reviews
def get_reviews(product):
    try:
        reviews = product.find('span', {'class': 'a-size-base'})
        return reviews.text.strip() if reviews else 'No reviews'
    except AttributeError:
        return 'No reviews'

# Function to extract availability
def get_availability(product):
    try:
        availability = product.find('div', class_='a-section a-spacing-none a-spacing-top-micro')
        availability_text = availability.find('span', class_='a-size-base a-color-secondary') if availability else None
        return availability_text.text.strip() if availability_text else 'Availability not listed'
    except AttributeError:
        return 'Availability not listed'

Scraping and entering data into csv file

In [43]:
# Open a CSV file to store the scraped data
with open('amazon_mobiles.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    # Write the headers
    writer.writerow(['Product Title', 'Price', 'Rating', 'Number of Reviews', 'Availability', 'Delivery', 'Free Delivery Date', 'Fast Delivery Date'])

    # Initialize page number
    page = 1

    while True:
        print(f"Scraping page {page}...")
        
        # Construct the full URL for the current page
        url = base_url.format(page)

        # Send request to the webpage
        response = requests.get(url, headers=headers)

        # Check if the request was successful
        if response.status_code == 200:
            # Parse the HTML content using BeautifulSoup
            soup = BeautifulSoup(response.content, 'html.parser')

            # Find all product listings
            products = soup.find_all('div', {'data-component-type': 's-search-result'})

            # Loop through each product and extract information
            for product in products:
                title = get_title(product)
                price = get_price(product)
                rating = get_rating(product)
                reviews = get_reviews(product)
                availability = get_availability(product)

                # Extract delivery info
                del_info = product.find('div', class_='a-row a-size-base a-color-secondary s-align-children-center')
                if del_info:
                    delivery_text = get_delivery_text(del_info)
                    free_delivery_date = get_free_delivery_date(del_info)
                    fast_delivery_date = get_fast_delivery_date(del_info)
                else:
                    delivery_text = 'Delivery info not available'
                    free_delivery_date = 'No info'
                    fast_delivery_date = 'No info'

                # Write the data to the CSV file
                writer.writerow([title, price, rating, reviews, availability, delivery_text, free_delivery_date, fast_delivery_date])

            # Check if there is a next page
            if not has_next_page(soup):
                print("No more pages to scrape.")
                break  # Exit the loop if no next page

            # Increment the page number
            page += 1

        else:
            print(f"Failed to retrieve page {page}, status code: {response.status_code}")
            break  # Exit the loop on error

print("Scraping complete! Data saved to 'amazon_mobiles.csv'.")

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
No more pages to scrape.
Scraping complete! Data saved to 'amazon_mobiles.csv'.


In [44]:
df=pd.read_csv("amazon_mobiles.csv")

In [45]:
df.head(15)

Unnamed: 0,Product Title,Price,Rating,Number of Reviews,Availability,Delivery,Free Delivery Date,Fast Delivery Date
0,Apple iPhone 16 (256 GB) - White,89900,4.7 out of 5 stars,14,200+ bought in past month,FREE delivery,"Sat, 12 Oct",No fastest delivery date info
1,Apple iPhone 13 (256 GB) - Green,53999,4.5 out of 5 stars,29534,1K+ bought in past month,FREE delivery,"Tue, 8 Oct","Tomorrow, 5 Oct"
2,"Samsung Galaxy M05 (Mint Green, 4GB RAM, 64 GB...",6499,3.9 out of 5 stars,50,5K+ bought in past month,FREE delivery,"Mon, 7 Oct","Tomorrow, 5 Oct"
3,"realme NARZO N61 (Voyage Blue,6GB RAM+128GB St...",8498,4.1 out of 5 stars,338,5K+ bought in past month,FREE delivery,"Mon, 7 Oct","Tomorrow, 5 Oct"
4,Samsung Galaxy M15 5G Prime Edition (Celestial...,10999,4.1 out of 5 stars,18,5K+ bought in past month,FREE delivery,"Sun, 13 Oct",No fastest delivery date info
5,"realme NARZO N61 (Marble Black,6GB RAM+128GB S...",8498,4.1 out of 5 stars,338,5K+ bought in past month,FREE delivery,"Sat, 12 Oct",No fastest delivery date info
6,"realme NARZO N61 (Voyage Blue,4GB RAM+64GB Sto...",Price not listed,4.1 out of 5 stars,338,5K+ bought in past month,Delivery info not available,No info,No info
7,"Redmi 13C 5G (Startrail Silver, 4GB RAM, 128GB...",8999,4.0 out of 5 stars,8064,5K+ bought in past month,FREE delivery,"Sun, 6 Oct","Tomorrow, 5 Oct"
8,"Redmi 13C (Starfrost White, 6GB RAM, 128GB Sto...",8499,4.1 out of 5 stars,7962,5K+ bought in past month,FREE delivery,"Tue, 8 Oct","Tomorrow, 5 Oct"
9,"realme NARZO N65 5G (Amber Gold 6GB RAM, 128GB...",12498,4.1 out of 5 stars,900,5K+ bought in past month,FREE delivery,"Mon, 7 Oct","Tomorrow, 5 Oct"
