In [36]:
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import requests
import pandas as pd
import numpy as np
import time

In [37]:
# Function to extract Product Title
def get_title(soup):

    try:
        # Outer Tag Object
        title = soup.find("span", attrs={"id":'productTitle'})
        
        # Inner NavigatableString Object
        title_value = title.text

        # Title as a string value
        title_string = title_value.strip()

    except AttributeError:
        title_string = ""

    return title_string

# Function to extract Product Price
def get_price(soup):
    try:
        price = soup.select_one("span.a-price > span.a-offscreen")
        if price:
            return price.text.strip()
        return ""
    except:
        return ""


# Function to extract Product Rating
def get_rating(soup):

    try:
        rating = soup.find("i", attrs={'class':'a-icon a-icon-star a-star-4-5'}).string.strip()
    
    except AttributeError:
        try:
            rating = soup.find("span", attrs={'class':'a-icon-alt'}).string.strip()
        except:
            rating = ""	

    return rating

# Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={'id':'acrCustomerReviewText'}).string.strip()

    except AttributeError:
        review_count = ""	

    return review_count

# Function to extract Availability Status
def get_availability(soup):
    try:
        available = soup.find("div", attrs={'id':'availability'})
        available = available.find("span").string.strip()

    except AttributeError:
        available = "Not Available"	

    return available

In [39]:
if __name__ == '__main__':

    # add your user agent 
    HEADERS = ({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36', 'Accept-Language': 'en-US, en;q=0.5'})

    # The webpage URL
    URL = "https://www.amazon.in/s?k=laptop&i=electronics&crid=1MGERN7C6LYFR&sprefix=lap%2Celectronics%2C669&ref=nb_sb_noss_1"

    # HTTP Request
    webpage = requests.get(URL, headers=HEADERS)

    # Soup Object containing all data
    soup = BeautifulSoup(webpage.content, "html.parser")

    # Fetch links as List of Tag Objects
    links = soup.find_all("a", attrs={'class':'a-link-normal s-no-outline'})

    # Store the links
    links_list = []

    # Loop for extracting links from Tag Objects
    for link in links:
            links_list.append(link.get('href'))

    d = {"title":[], "price":[], "rating":[], "reviews":[],"availability":[]}
    
    # Loop for extracting product details from each link 
    for link in links_list:
        product_url = urljoin("https://www.amazon.in", link)
        product_url = product_url.split("/ref=")[0] # clean URL
        
        new_webpage = requests.get(product_url, headers=HEADERS)
        new_soup = BeautifulSoup(new_webpage.content, "html.parser")

        # Function calls to display all necessary product information
        d['title'].append(get_title(new_soup))
        d['price'].append(get_price(new_soup))
        d['rating'].append(get_rating(new_soup))
        d['reviews'].append(get_review_count(new_soup))
        d['availability'].append(get_availability(new_soup))

        time.sleep(2)   # prevent blocking

    
    amazon_df = pd.DataFrame.from_dict(d)
    # ---------- CLEANING ----------

    amazon_df['title'] = amazon_df['title'].replace('', np.nan)

    amazon_df['price'] = (
        amazon_df['price']
        .str.replace('â‚¹', '₹', regex=False)
        .str.replace(',', '', regex=False)
    )

    amazon_df['price'] = amazon_df['price'].apply(
        lambda x: f"₹{float(x.replace('₹','')):,.2f}" if x else x
    )

    amazon_df['reviews'] = (
        amazon_df['reviews']
        .str.replace(',', '', regex=False)
        .str.replace('-', '', regex=False)
    )

    amazon_df['rating'] = (
        amazon_df['rating']
        .str.replace(' out of 5 stars', '', regex=False)
    )

    amazon_df = amazon_df.dropna(subset=['title'])

    # save CSV (₹ symbol safe for Excel)
    amazon_df.to_csv("amazon_data.csv", index=False, encoding="utf-8-sig")

In [40]:
d

{'title': ['Lenovo V14 Intel Core i3 13th Gen 14" FHD (1920x1080) Antiglare 250 Nits Thin and Light Laptop (8GB RAM/512GB SSD/Windows 11 Home/Office Home 2024/Iron Grey/1.43 kg), 83A0A0PBIN',
  'Lenovo V15 G4 AMD Ryzen 5 7520U 15.6 inch FHD Laptop, AMD Graphics, 16GB DDR5 5500Mhz Ram, 512GB SSD NVMe, Windows 11, Dolby Audio, Arctic Grey, 1 Year Onsite Brand Warranty',
  'BrowseBook 14.1" FHD IPS Laptop | Best Student & Office Work Laptop | Celeron N4020 | 4GB RAM | 128GB SSD | Windows 11 | 38Wh | 1.3kg | Grey',
  'acer Aspire Lite, AMD Ryzen 3 5300U Processor, 16 GB RAM, 512GB SSD, Full HD, 15.6"/39.62cm, Windows 11 Home, Steel Gray, 1.59KG, AL15-41, Metal Body, Premium Thin and Light Laptop',
  'acer [SmartChoice Aspire 3 Laptop Intel Core Celeron N4500 Processor Laptop (8 GB LPDDR4X SDRAM/512 GB SSD/Win11 Home/38 WHR/HD Webcam) A325-45 with 39.63 cm (15.6") HD Display, Pure Silver, 1.5 KG',
  'Primebook 2 Max 2025 (New Launch) | 8GB RAM, 256GB UFS Storage | 15.6-Inch Full HD IPS Disp

In [41]:
amazon_df

Unnamed: 0,title,price,rating,reviews,availability
0,"Lenovo V14 Intel Core i3 13th Gen 14"" FHD (192...","₹35,990.00",3.8,(56),In stock
1,Lenovo V15 G4 AMD Ryzen 5 7520U 15.6 inch FHD ...,"₹38,999.00",4.3,(313),In stock
2,"BrowseBook 14.1"" FHD IPS Laptop | Best Student...","₹12,090.00",4.7,(60),
3,"acer Aspire Lite, AMD Ryzen 3 5300U Processor,...","₹30,990.00",3.9,(2070),In stock
4,acer [SmartChoice Aspire 3 Laptop Intel Core C...,"₹24,490.00",3.4,(818),In stock
5,"Primebook 2 Max 2025 (New Launch) | 8GB RAM, 2...","₹21,590.00",4.4,(369),In stock
6,Lenovo Yoga Slim 7 (Smartchoice) Intel Core Ul...,"₹114,990.00",4.4,(16),In stock
7,"Lenovo Smartchoice Yoga Slim 7 Aura Edition, I...","₹147,990.00",4.2,(48),Only 1 left in stock.
8,"Lenovo ThinkBook 16, AMD Ryzen 7 7735HS, 16GB ...","₹54,490.00",4.0,(40),
9,"Primebook 2 Pro 2025 (New Launch) | 8GB RAM, 1...","₹18,590.00",4.4,(248),In stock


In [29]:
amazon_df = amazon_df[['title', 'price', 'rating', 'reviews', 'availability']]
amazon_df


Unnamed: 0,title,price,rating,reviews,availability
0,"Lenovo V14 Intel Core i3 13th Gen 14"" FHD (192...","₹35,990.00",3.8,(56),In stock
1,Lenovo V15 G4 AMD Ryzen 5 7520U 15.6 inch FHD ...,"₹38,999.00",4.3,(313),In stock
2,"BrowseBook 14.1"" FHD IPS Laptop | Best Student...","₹12,090.00",4.7,(60),
3,"HP 15, 13th Gen Intel Core i5-1334U (16GB DDR4...","₹56,990.00",3.9,(766),In stock
4,DeII Latitude 5410 InteI Core i5 10th Gen | 8G...,"₹24,299.00",1.0,(1),In stock
5,"Primebook 2 Max 2025 (New Launch) | 8GB RAM, 2...","₹21,590.00",4.4,(369),In stock
6,Lenovo Yoga Slim 7 (Smartchoice) Intel Core Ul...,"₹1,14,990.00",4.4,(16),In stock
7,"Lenovo Smartchoice Yoga Slim 7 Aura Edition, I...","₹1,47,990.00",4.2,(48),Only 1 left in stock.
8,"Lenovo ThinkBook 16, AMD Ryzen 7 7735HS, 16GB ...","₹54,490.00",4.0,(40),
9,"Primebook 2 Pro 2025 (New Launch) | 8GB RAM, 1...","₹18,590.00",4.4,(248),In stock


In [35]:
print(amazon_df.head())


                                               title       price rating  \
0  Lenovo V14 Intel Core i3 13th Gen 14" FHD (192...  ₹35,990.00    3.8   
1  Lenovo V15 G4 AMD Ryzen 5 7520U 15.6 inch FHD ...  ₹38,999.00    4.3   
2  BrowseBook 14.1" FHD IPS Laptop | Best Student...  ₹12,090.00    4.7   
3  acer Aspire Lite, AMD Ryzen 3 5300U Processor,...  ₹30,990.00    3.9   
4  HP 14s Intel Celeron Dual Core N4500 Laptop (8...  ₹26,490.00    4.1   

  reviews availability  
0    (56)     In stock  
1   (313)     In stock  
2    (60)               
3  (2070)     In stock  
4    (10)     In stock  
