In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [4]:
# Function to extract product title
def get_title(soup):
    
    try:
        title = soup.find("span",attrs={'id':'productTitle'})
        title_value = title.text
        title_string = title_value.strip()
    except:
        title_string = ""
        
    return title_string

# Function to extract product price
def get_price(soup):
    try:
        price = soup.find("div",attrs={"id":"corePriceDisplay_desktop_feature_div"}).find("span",attrs={"class":"a-offscreen"}).text
    except:
        price = ""
    return price

# Function to extract product rating
def get_rating(soup):
    try:
        rating = soup.find("span",attrs={"class":"a-icon-alt"}).text
    except:
        rating = ""
    return rating

# Function to extract product reviews
def get_review_count(soup):
    try:
        review_count = soup.find("a",attrs={"id":"acrCustomerReviewLink"}).text.strip()
    except:
        review_count = ""
    return review_count

# Function to check availability
def get_availability(soup):
    try:
        availability = soup.find("div", attrs={'id':'availability'}).text.strip()
        if availability == "":
            availability = "In Stock"
    except:
        availability = "Not Available"
    return availability
        

In [8]:
URL = "https://www.amazon.com/s?k=xbox+series+x&ref=nb_sb_ss_ts-doa-p_3_4"
HEADERS = ({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', 'Accept-Language':'en-US, en;q=0.5'})
webpage = requests.get(URL,headers=HEADERS)
soup = BeautifulSoup(webpage.content, "html.parser")
links = soup.find_all("a",attrs = {'class':'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})
links_list = []

for link in links:
    links_list.append(link.get('href'))

d = {"title":[], "price":[], "rating":[], "reviews":[],"availability":[]}

for link in links_list:
    try:
        new_webpage = requests.get("https://www.amazon.com"+link, headers=HEADERS)
    except:
        continue
    new_soup = BeautifulSoup(new_webpage.content, "html.parser")
    
    d['title'].append(get_title(new_soup))
    d['price'].append(get_price(new_soup))
    d['rating'].append(get_rating(new_soup))
    d['reviews'].append(get_review_count(new_soup))
    d['availability'].append(get_availability(new_soup))
    
amazon_df = pd.DataFrame.from_dict(d)
amazon_df['title'].replace('',np.nan,inplace=True)
amazon_df = amazon_df.dropna(subset=['title'])
amazon_df.to_csv("amazon_data.csv", header=True, index=False)
amazon_df

Unnamed: 0,title,price,rating,reviews,availability
0,Xbox Series X,,4.8 out of 5 stars,"21,871 ratings",In Stock
1,Xbox Series X & $100 Gift Card [Digital Code],,,,Not Available
2,Star Wars Jedi: Survivor - Xbox Series X,$69.75,,61 ratings,Not Available
3,Star Wars Jedi: Survivor Standard - Xbox Serie...,$69.99,,15 ratings,Available now
4,Hogwarts Legacy Deluxe Edition - Xbox Series X,,,242 ratings,Not Available
5,Diablo IV - Xbox Series X,$69.99,,,"This item will be released on June 6, 2023. ..."
6,"Xbox Wireless Headset – Xbox Series X|S, Xbox ...",,,"7,217 ratings",Only 1 left in stock - order soon
7,Hyperkin Xenon Wired Controller (White) For Xb...,$49.99,,,"This item will be released on June 6, 2023. ..."
8,Xbox Wireless Controller – Stellar Shift Speci...,,,260 ratings,Only 5 left in stock - order soon
9,Seagate Storage Expansion Card 2TB Solid State...,$279.99,,"65,365 ratings",Usually ships within 1 to 2 months
