In [44]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [45]:
#Function to extract Product Title 
def get_title(soup):

    try:
        #outer Tag Object 
        title = soup.find("span",attrs={"id":'productTitle'})
        
        # Inner NavigatableString Object
        titleValue = title.text

        # Title as a string value
        titleString = titleValue.strip() #strip function is used to remove extra whitespaces and specified characters from the start and from the end of the strip irrespective of how the parameter is passed 

    except AttributeError:#THis error occurs in the program when there is a conditioning failure in the attribute assignmenet 
        titleString = ""
    
    return titleString


#Function to extract Product Price 
def get_price(soup):
    try:
        price = soup.find("span", attrs={'id':'priceblock_ourprice'}).string.strip()
    except AttributeError:
        try:
            #if there is some deal price
            price = soup.find("span", attrs={'id':'priceblock_dealprice'}).string.strip()
        except:
            price=""
    return price

#Function to extract Product Rating 
def get_rating(soup):
    try:
        rating = soup.find("i",attrs={'class':'a-icon a-icon-star a-star-4-5'}).string.strip()

    except AttributeError:
        try:
            rating = soup.find("span", attrs={'class':'a-icon-alt'}).string.strip()
        except AttributeError:
            try:
                rating = soup.find("span", attrs={'class':'a-icon-alt'}).string.strip()
            except:
                rating = ""
    return rating

#Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={'id':'acrCustomerReviewText'}).string.strip()
    except AttributeError:
        review_count = ""
    return review_count

#Function to extract Available Status
def get_availability(soup):
        try:
            available = soup.find("div", attrs={'id':'availability'})
            available = available.find("span").string.strip()

        except AttributeError:
            available = "Not Available"
        
        return available

#Function to get the description of the product
def get_descriptionAboutProduct(soup):
    try:
        description = soup.find("span",attrs={'class':'a-list-item'}).string.strip()
    except AttributeError:
        description= ""
    return description

In [46]:
if __name__ == '__main__':
    # Add your user agent
    HEADERS = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US, en;q=0.5'
    }
    for i in range(2,5):
        # The webpage URL
        URL = "https://www.amazon.in/s?k=electronics&page={i}&crid=1WNSB3EW4IG2F&qid=1726035644&sprefix=electronic%2Caps%2C443&ref=sr_pg_{i-1}"

        # HTTP Request
        webpage = requests.get(URL, headers=HEADERS)

        # Soup object containing all data
        soup = BeautifulSoup(webpage.content, "html.parser")

        # Fetch links as a list of Tag objects
        links = soup.find_all("a", attrs={'class': 'a-link-normal s-no-outline'})

        # Sort the links
        links_list = []

        # Loop for extracting links from TAG Objects
        for link in links:
            href = link.get('href')
            if href:
                links_list.append(href)

        d = {
            "title": [],
            "price": [],
            "rating": [],
            "reviews": [],
            "availability": [],
            "descriptionAboutProduct": []
        }

        # Loop for extracting product details from each link
        for link in links_list:
            new_webpage = requests.get("https://www.amazon.in" + link, headers=HEADERS)
            new_soup = BeautifulSoup(new_webpage.content, "html.parser")

            # Function calls to display all necessary product information
            d['title'].append(get_title(new_soup))
            d['price'].append(get_price(new_soup))
            d['rating'].append(get_rating(new_soup))
            d['reviews'].append(get_review_count(new_soup))
            d['availability'].append(get_availability(new_soup))
            #d['descriptionAboutProduct'].append(get_descriptionAboutProduct(new_soup))

        amazonElectronic_df = pd.DataFrame.from_dict(d)
        #amazonElectronic_df['title'].replace('', np.nan, inplace=True)
        amazonElectronic_df = amazonElectronic_df.dropna(subset=['title'])
        amazonElectronic_df.to_csv("amazonElectronicdata.csv", header=True, index=False)