In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import time

In [2]:
def get_title(soup):
    title = soup.find("span", attrs={"id": "productTitle"})
    return title.text.strip() if title else None

def get_price(soup):
    price = soup.find("span", attrs={"class": "a-price-whole"})
    if price:
        return price.text.strip() + (soup.find("span", attrs={"class": "a-price-symbol"}).text.strip() if soup.find("span", attrs={"class": "a-price-symbol"}) else '')
    return None

def get_rating(soup):
    rating = soup.find("span", attrs={"class": "a-icon-alt"})
    return rating.text.strip() if rating else None

def get_review_count(soup):
    reviews = soup.find("span", attrs={"id": "acrCustomerReviewText"})
    return reviews.text.strip() if reviews else None

In [3]:
if __name__ == '__main__':
    HEADERS = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US, en;q=0.5'
    }

    URL = "https://www.amazon.com/s?k=nintendo+games"

    try:
        webpage = requests.get(URL, headers=HEADERS)
        webpage.raise_for_status()
        soup = BeautifulSoup(webpage.content, "html.parser")

        links = soup.find_all("a", attrs={'class': 'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})
        links_list = ["https://www.amazon.com" + link.get('href') for link in links]

        product_data = {"title": [], "price": [], "rating": [], "reviews": []}

        for link in links_list:
            try:
                new_webpage = requests.get(link, headers=HEADERS)
                new_webpage.raise_for_status()
                new_soup = BeautifulSoup(new_webpage.content, "html.parser")

                product_data['title'].append(get_title(new_soup))
                product_data['price'].append(get_price(new_soup))
                product_data['rating'].append(get_rating(new_soup))
                product_data['reviews'].append(get_review_count(new_soup))

                time.sleep(2)
                '''time.sleep(2)ป้องกันไม่ให้เข้าถึงเซิร์ฟเวอร์หรือหลีกเลี่ยงไม่ให้เซิร์ฟเวอร์รับภาระมากเกินไป'''
            except requests.exceptions.RequestException as e:
                print(f"Error fetching product page: {e}")

        amazon_df = pd.DataFrame.from_dict(product_data)
        amazon_df['title'].replace('', np.nan, inplace=True)
        amazon_df = amazon_df.dropna(subset=['title'])

        print(amazon_df)

    except requests.exceptions.RequestException as e:
        print(f"Error fetching main URL: {e}")

Error fetching product page: HTTPSConnectionPool(host='www.amazon.comhttps', port=443): Max retries exceeded with url: /aax-us-iad.amazon.com/x/c/JJZZW1PJja9SJhe7wd1ziEsAAAGSHUGJtwEAAAH2AQBvbm9fdHhuX2JpZDMgICBvbm9fdHhuX2ltcDEgICDCqBsk/https://www.amazon.com/Gammeefy-Controller-Replacement-Controler%EF%BC%8CSwitch-Controllers/dp/B09QPWT86S/ref=sxbs_sbv_search_btf?content-id=amzn1.sym.2f0a8989-0b67-47e7-b61e-9e3ef9908602%3Aamzn1.sym.2f0a8989-0b67-47e7-b61e-9e3ef9908602&cv_ct_cx=nintendo+games&dib=eyJ2IjoiMSJ9.p-bLC4bZRAiYqLVZoWIudQ.Rh0C7lx8fwx6rq0fe9YfqFiOYWmjySPBcjZX6xOZoV4&dib_tag=se&keywords=nintendo+games&pd_rd_i=B09QPWT86S&pd_rd_r=cf8002cb-81a2-4224-9fac-1ca015a962d9&pd_rd_w=x9fsz&pd_rd_wg=ZWb1L&pf_rd_p=2f0a8989-0b67-47e7-b61e-9e3ef9908602&pf_rd_r=712A19MHMDDKXDZ5X99K&qid=1727067687&sbo=RZvfv%2F%2FHxDF%2BO5021pAnSA%3D%3D&sr=1-1-a61ee601-6e56-4862-a8a2-1d3da5a5406f (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x000001D284E54590>: Failed to resolve 'ww

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  amazon_df['title'].replace('', np.nan, inplace=True)


In [4]:
amazon_df

Unnamed: 0,title,price,rating,reviews
0,Minecraft - Nintendo Switch,29.$,4.8 out of 5 stars,"43,859 ratings"
1,Mario Party Superstars - US Version,51.$,4.8 out of 5 stars,"15,224 ratings"
2,Super Mario Bros.™ Wonder - Nintendo Switch (U...,53.$,4.9 out of 5 stars,"8,278 ratings"
3,LEGO Super Mario Donkey Kong’s Tree House Expa...,47.$,4.8 out of 5 stars,701 ratings
4,PowerA Everywhere Messenger Bag for Nintendo S...,9.$,4.7 out of 5 stars,"3,656 ratings"
5,Game Traveler Mario Kart Nintendo Switch Case ...,24.$,4.8 out of 5 stars,"3,845 ratings"
6,Nintendo Switch Case,19.$,4.8 out of 5 stars,680 ratings
7,PDP Messenger Case with Removable Shoulder Str...,43.$,4.7 out of 5 stars,451 ratings
8,PDP Messenger Case with Removable Shoulder Str...,359.$,4.5 out of 5 stars,273 ratings
9,Super Smash Bros. Ultimate - US Version,47.$,4.8 out of 5 stars,"70,975 ratings"


In [5]:
# Save to CSV 
amazon_df.to_csv("amazon_data.csv", header=True, index=False)