In [4]:
# Importing the Required Modules

from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv

In [5]:
# !pip install python-dotenv

In [2]:
# Function to extract product Title
def get_title(soup):
	try:
		# get the product title as a string
		title = soup.find("span", attrs={"id": "productTitle"}).text.strip()
	
	except AttributeError:
		title = ""
	return title

# Function to extract Product Price
def get_price(soup):
	try:
		# get the product title as a string
		price = soup.find("span", attrs={"class": "a-offscreen"}).text.strip()
	except AttributeError:
		try:
			# If there is some deal price
			price = soup.find("span", attrs={'id':'priceblock_dealprice'}).string.strip()
			
		except:
			price = ""
	return price

# Function to extract product rating
def get_rating(soup):
	try:
		# get the product rating as a string
		rating = soup.find("span", attrs={"class": "a-icon-alt"}).text.strip()
	except AttributeError:
		rating = ""
	return rating

# Function to get the number of product reviews by amazon users
def get_review_count(soup):
	try:
		# get the number of product reviews as a string
		review_count = soup.find("span", attrs={'id':'acrCustomerReviewText'}).text.strip()
	except AttributeError:
		review_count = ""
	return review_count

# Function to extract the availability status of a product
def get_availability(soup):
	try:
		# get the availability status of a product
		availability = soup.find("div", attrs={'id':'availability'})
		availability = availability.find("span").text.strip()
	except AttributeError:
		availability = "Not in Stock"
	return availability

In [20]:
if __name__ == '__main__':

	# The Amazon URL to check
	url = "https://www.amazon.com/s?k=laptop&crid=3S0WZLZ9Y84JK&sprefix=laptop%2Caps%2C592&ref=nb_sb_noss_1"
	
	
	# read my credentials
	user_agent = os.environ['USER_AGENT']
	
	# my header
	header = ({'user-agent': user_agent, 'Accept-Language': 'en-US, em;q=0.5'})

	# HTTP request to the amazon website

	amazon = requests.get(url, headers=header).text

	# soup object containing all HTML data
	soup = BeautifulSoup(amazon, "html.parser")

	# Fetch links from the HTML archor tags
	links = soup.find_all("a", attrs={'class': 'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})

	# Store the links in an list
	links_list = []

	# Extract the links from the links text
	for link in links:
		links_list.append(link.get('href'))
	
	# create a dictionary for the data

	cols = {"title":[], "price":[], "rating":[], "reviews":[], "availability":[]}

	# extract product detail from all the links in the links_list
	for link in links_list:
		amazon_page = requests.get("https://www.amazon.com" + link, headers=header)
		sub_soup = BeautifulSoup(amazon_page.content, "html.parser")

		# Function calls to display all necessary product information
		cols['title'].append(get_title(sub_soup))
		cols['price'].append(get_price(sub_soup))
		cols['rating'].append(get_rating(sub_soup))
		cols['reviews'].append(get_review_count(sub_soup))
		cols['availability'].append(get_availability(sub_soup))
	
	amazon_data = pd.DataFrame.from_dict(cols)
	amazon_data['title'].replace('',np.nan, inplace=True)
	amazon_data = amazon_data.dropna(subset=['title'])
	amazon_data.to_csv("amazon_data.csv", header=True, index=True)

In [21]:
amazon_data

Unnamed: 0,title,price,rating,reviews,availability
0,"Lenovo Ideapad 15.6"" HD Laptop, Athlon Silver ...",$399.00,4.8 out of 5 stars,10 ratings,In Stock
1,"Acer 2023 Newest Aspire 5 15.6"" FHD IPS Slim L...",$419.00,4.4 out of 5 stars,196 ratings,In Stock
2,"Lenovo 2022 Newest Ideapad 3 Laptop, 15.6"" HD ...",$379.89,4.4 out of 5 stars,"1,976 ratings",In Stock
3,"SGIN Laptop 15.6 Inch, 4GB DDR4 128GB SSD Wind...",$259.99,4.5 out of 5 stars,"1,377 ratings",In Stock
4,"Acer Aspire 5 A515-45-R74Z Slim Laptop | 15.6""...",$389.99,4.5 out of 5 stars,"39,502 ratings",In Stock
5,"Lenovo IdeaPad 1i Laptop 14"" HD Display, Intel...",$219.00,5.0 out of 5 stars,7 ratings,In Stock
6,"MSI Katana GF66 15.6"" 144Hz FHD Gaming Laptop:...",$899.00,4.5 out of 5 stars,"1,596 ratings",
7,"Microsoft Surface Laptop Go 12.4"" Touchscreen ...",$265.00,4.2 out of 5 stars,57 ratings,Only 9 left in stock - order soon
8,"Acer Aspire 5 A515-45-R74Z Slim Laptop | 15.6""...",$389.99,4.5 out of 5 stars,"39,502 ratings",In Stock
9,"ASUS Chromebook CX1, 15.6"" Full HD NanoEdge Di...",$209.99,4.1 out of 5 stars,273 ratings,In Stock
