In [68]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import time
import random

In [69]:
# Function to extract Product Title
def get_title(soup):

    try:
        title = soup.find("span", attrs={"id":'productTitle'}).text

    except AttributeError:
        title = np.nan

    return title


In [70]:
# Function to extract Product Price (current after discount)
def get_Currentprice(soup):

    try:
        price = soup.find("span", attrs={"class":'a-price-whole'}).text
        
    except AttributeError:
        price = np.nan

    return price

In [71]:
# Function to extract Product Rating
def get_rating(soup):

    try:
        rating = soup.find("i", attrs={"class":'a-icon a-icon-star a-star-4-5 cm-cr-review-stars-spacing-big'}).text
    except AttributeError:
        try:
            rating = soup.find("span", attrs={"class":'a-icon-alt'}).text
        except:
            rating = np.nan	

    return rating

In [72]:
# Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={"id":'acrCustomerReviewText'}).text

    except AttributeError:
        review_count = np.nan	

    return review_count

In [73]:
# Function to extract Product Price (original before discount)

def get_Originalprice(soup):

    try:
        price = soup.find("span", attrs={"class":'a-price a-text-price'}).find("span", attrs={"class":'a-offscreen'}).text
        
    except AttributeError:
        price = np.nan

    return price

In [74]:
# Function to extract Total off percentage

def get_OffPercentage(soup):
    try:
        offpercentage = soup.find("span", attrs={"class":'a-size-large a-color-price savingPriceOverride aok-align-center reinventPriceSavingsPercentageMargin savingsPercentage'}).text
    
    except AttributeError:
        offpercentage = np.nan
    
    return offpercentage

In [None]:
if __name__ == '__main__':

    # add your user agent 
    HEADERS = ({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36', 'Accept-Language': 'en-US, en;q=0.5'})
    
    links_list = []

    for page in range(1, 6):

        URL = f"https://www.amazon.in/s?k=laptops+under+80k&page={page}&xpid=nL0bZw85pLnmX&crid=3RV63LQTD6FH5&qid=1761040929&sprefix=%2Caps%2C215&ref=sr_pg_1"
        webpage = requests.get(URL, headers=HEADERS)
        soup = BeautifulSoup(webpage.content, "html.parser")

        links = soup.find_all("a", attrs={'class':'a-link-normal s-no-outline'})

        for link in links:
            href = link.get('href')
            if href and href.startswith('/'):
                links_list.append(href)
                if len(links_list) >= 100:
                    break
        
        time.sleep(random.uniform(2, 5))

        if len(links_list) >= 100:
            break

    d = {"title":[], "current price":[], "original price":[], "rating":[], "reviews":[], "off percentage":[]}

    # Loop for extracting product details from each link 
    for link in links_list:
        product = "https://www.amazon.in" + link
        new_webpage = requests.get(product, headers=HEADERS)
        time.sleep(random.uniform(1, 4))
        new_soup = BeautifulSoup(new_webpage.content, "html.parser")

        # Function calls to display all necessary product information
        d['title'].append(get_title(new_soup))
        d['current price'].append(get_Currentprice(new_soup))
        d['original price'].append(get_Originalprice(new_soup))
        d['rating'].append(get_rating(new_soup))
        d['reviews'].append(get_review_count(new_soup))
        d['off percentage'].append(get_OffPercentage(new_soup))

    
    amazon_df = pd.DataFrame.from_dict(d)
    print(amazon_df.isna().sum())
    amazon_df = amazon_df.dropna()

title             0
current price     0
original price    1
rating            0
reviews           0
off percentage    2
dtype: int64


In [None]:
# To remove the Ruppee symbol
amazon_df['original price'] = amazon_df['original price'].str.replace('₹', '', regex=False).str.replace(',', '', regex=False).astype(float)
amazon_df.to_csv("amazon_data.csv", header=True, index=False)

amazon_df
links_list

Unnamed: 0,title,current price,original price,rating,reviews,off percentage
0,"Apple 2025 MacBook Air (13-inch, Apple...",96990.,99900.0,4.6 out of 5 stars,620 ratings,-3%
1,"HP Victus, AMD Ryzen 7 7445HS, 6GB RTX...",79990.,95746.0,3.8 out of 5 stars,232 ratings,-16%
2,"Lenovo LOQ, Intel Core i5 13th Gen 134...",86440.,126990.0,4.5 out of 5 stars,22 ratings,-32%
3,"HP Pavilion x360, 13th Gen Intel Core ...",68990.,99019.0,3.7 out of 5 stars,253 ratings,-30%
4,"ASUS Gaming V16 (2025) 14th Gen,Intel ...",84990.,113990.0,4.3 out of 5 stars,29 ratings,-25%
...,...,...,...,...,...,...
95,"HP Victus Gaming Laptop, 12th Gen Inte...",82650.,125000.0,5.0 out of 5 stars,1 rating,-34%
96,"Apple 2025 MacBook Air (15-inch, Apple...",139990.,144900.0,4.6 out of 5 stars,65 ratings,-3%
97,Apple 2024 MacBook Air 15″ Laptop with...,167990.,174900.0,5.0 out of 5 stars,1 rating,-4%
98,"HP Victus Gaming Laptop, 12th Gen Inte...",79560.,89999.0,4.7 out of 5 stars,8 ratings,-12%
