In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Define the URL for the search results (Amazon search page for smart locks)
url = "https://www.amazon.in/s?k=smart+lock"

# Make a request to the page
headers = {"User-Agent": "your-user-agent"}  # Replace 'your-user-agent' with your actual User-Agent string
page = requests.get(url, headers=headers)
soup = BeautifulSoup(page.content, "html.parser")

# Initialize lists to store scraped data
brands, prices, ratings, rating_counts, review_counts, ranks, urls = [], [], [], [], [], [], []

# Find the relevant HTML tags and scrape the data (this depends on the website’s structure)
products = soup.find_all("div", class_="s-main-slot s-result-list s-search-results sg-row")

# Scrape data for each product on the page
for i, product in enumerate(products, start=1):
    try:
        # Extract brand name
        brand = product.find("span", class_="a-size-medium").text if product.find("span", class_="a-size-medium") else "N/A"
        
        # Extract price (remove commas and handle cases where price is not available)
        price = product.find("span", class_="a-price-whole")
        if price:
            price = int(price.text.replace(",", ""))
        else:
            price = 0  # Set price to 0 if not available
        
        # Extract rating (handle cases where rating is missing)
        rating = product.find("span", class_="a-icon-alt")
        if rating:
            rating = float(rating.text.split()[0])
        else:
            rating = 0.0  # Set rating to 0 if not available
        
        # Extract rating count (handle missing or non-numeric values)
        rating_count = product.find("span", class_="a-size-base")
        if rating_count:
            try:
                rating_count = int(rating_count.text.replace(",", ""))
            except ValueError:
                rating_count = 0  # Set to 0 if it's not a number
        else:
            rating_count = 0
        
        # Extract the product URL
        url = product.find("a", class_="a-link-normal")["href"]
        
        # Append the scraped data to lists
        brands.append(brand)
        prices.append(price)
        ratings.append(rating)
        rating_counts.append(rating_count)
        ranks.append(i)  # Rank is the position in the search results
        urls.append("https://www.amazon.in" + url)
    
    except Exception as e:
        print(f"Error occurred while scraping product {i}: {e}")
        continue

# Create a DataFrame for the scraped data
df = pd.DataFrame({
    'Brand': brands,
    'Price': prices,
    'Rating': ratings,
    'Rating Count': rating_counts,
    'Rank': ranks,
    'URL': urls
})

# Save the DataFrame to a CSV file
df.to_csv("smart_lock_data.csv", index=False)

print("Scraping completed. Data saved to smart_lock_data.csv")


Scraping completed. Data saved to smart_lock_data.csv


In [3]:
brand_counts = df['Brand'].value_counts()
print(brand_counts)

Series([], Name: Brand, dtype: int64)


In [5]:
df['Price Band'] = pd.cut(df['Price'], bins=[0, 4999, 9999, 14999, 19999, float('inf')], labels=['<4999', '5000-9999', '10000-14999', '15000-19999', '>20000'])
price_distribution = df['Price Band'].value_counts()
print(price_distribution)

<4999          0
5000-9999      0
10000-14999    0
15000-19999    0
>20000         0
Name: Price Band, dtype: int64
