In [31]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [32]:
# Define functions to fetch product title, price, rating etc

# Fuction to extract product title
def product_title(soup):
    
    try:
        title = soup.find("span", attrs={'id': 'productTitle'}).string.strip()
        
    except AttributeError:
        title = ""
    
    return title

# Fuction to extract product price
def product_price(soup):
    
    try:
        price = soup.find("span", attrs={'class': 'a-price aok-align-center reinventPricePriceToPayMargin priceToPay'}).find("span", attrs={'class': 'a-offscreen'}).string.strip()[1:].replace(',', '')
        
    except AttributeError:
        price = ""
        
    return price

# Fuction to extract product star rating
def product_star_rating(soup):
    
    try:
        star_rating = soup.find("div", attrs={'id': 'averageCustomerReviews'}).find("span", attrs={'class':'a-icon-alt'}).string.strip()[0:3]
    
    except AttributeError:
        star_rating = ""
        
    return star_rating

# Fuction to extract total number of ratings
def product_ratings(soup):
    
    try:
        ratings = soup.find("span", attrs={'id': 'acrCustomerReviewText'}).string.strip()[:-8].replace(',', '')
        
    except AttributeError:
        ratings = ""
        
    return ratings

# Fuction to extract product availability
def product_availability(soup):
    
    try:
        availability = soup.find("div", attrs={'id': 'availability'}).find("span", attrs={'class': 'a-size-medium a-color-success'}).string.strip()
        
    except AttributeError:
        try:
            availability = soup.find("div", attrs={'id': 'availability'}).find("span", attrs={'class': 'a-size-base a-color-price a-text-bold'}).string.strip()
        except:
            availability = "Not available"
        
    return availability


In [33]:
if __name__ == '__main__':
    
    # Define user agent
    HEADERS = ({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36', 'Accept-Language': 'en-US, en;q=0.5'})
    
    # Define website URL
    URL = "https://www.amazon.in/s?k=monitor&ref=nb_sb_noss_1"
    
    #HTTP request
    webpage = requests.get(URL, headers=HEADERS)
    
    #Soup object containing all data
    soup = BeautifulSoup(webpage.content, "html.parser")
    
    #Fetch links as list of tag objects
    links = soup.find_all("a", attrs={'class': 'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})
    
    # Store the links
    links_list = []
    
    # Loop through links and append to links_list
    for link in links:
        links_list.append("https://amazon.in" + link.get('href'))
        
    # Create a dictionary to store all the values
    d = {"title":[], "price":[], "star_rating":[], "total_number_of_ratings":[], "availability":[]}
    
    for link in links_list:
        # Get the product webpage
        product_webpage = requests.get(link, headers=HEADERS)
        
        # Create soup object containing all data
        product_soup = BeautifulSoup(product_webpage.content, "html.parser")
        
        # Call functions to insert values into dictionary
        d['title'].append(product_title(product_soup))
        d['price'].append(product_price(product_soup))
        d['star_rating'].append(product_star_rating(product_soup))
        d['total_number_of_ratings'].append(product_ratings(product_soup))
        d['availability'].append(product_availability(product_soup))
    
    # Create a pandas dataframe and import the final result as csv
    df_amazon = pd.DataFrame.from_dict(d)
    df_amazon['title'].replace('', np.nan, inplace=True)
    df_amazon = df_amazon.dropna(subset=['title'])
    df_amazon.to_csv('amazon_data.csv', header=True, index=False)