In [7]:
#importing req. Libraries
from bs4 import BeautifulSoup
import pandas as pd
import requests
import numpy as np

In [1]:
# function to extract title
def get_title(soup):
    try:
        # get the outer object tag
        title = soup.find("span", attrs={"id":'productTitle'})
        
        #extracted the tag
        title_value = title.text
        
        #removing the white spaces
        title_string = title_value.strip()
        
    except AttributeError:
        title_string = ""
        
    return title_string

#function to extract price
def get_price(soup):
    try:
        price = soup.find("span", attrs = {'id':'priceblock_ourprice'}).string.strip()
    except AttributeError:
        try:
            #if there is some discount price
            price = soup.find("span", attrs = {'id':'priceblock_dealprice'}).string.strip()
        except:
            price = ""
    return price

  
#function to extract ratings 
def get_rating(soup):
    try:
        rating = soup.find("span", attrs = {'id':'a-icon a-icon-star a-star-4-5'}).string.strip()
    except AttributeError:
        try:
            rating = soup.find("span", attrs = {'class':'a-icon-alt'}).string.strip()
        except:
            rating = ""
    return rating

#function to get number of reviews 
def get_review_number(soup):
    #attrs={'id':'acrCustomerReviewText'}
    try:
        review_number = soup.find("span", attrs={'id':'acrCustomerReviewText'}).string.strip()
    except AttributeError:
        review_number = ''
    return review_number
    
#function to extract availibilty
def get_availibility(soup):
    try:
        availible = soup.find("div", attrs = {'id':'availability'})
        availible = availible.find("span").strip.string()
    except AttributeError:
        availible = "Not Availible"
    
    return availible
            

In [14]:
if __name__ == '__main__':
    
    #adding user agent 
    HEADERS = ({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36','Accept-Language':'en-US, en:q=0.5'})
    
    #adding the URL
    URL = "https://www.amazon.com/s?k=playstation+4&ref=nb_sb_noss_2"
    
    #HTTP request
    webpage = requests.get(URL, headers=HEADERS)
    
    #Creating a soup object that have all the data of the webpage
    soup = BeautifulSoup(webpage.content, "html.parser")
    
    #Fetch Links as a list of Tag Objects
    links = soup.find_all("a", attrs = {'class':'a-link-normal s-no-outline'})
    
    #store the links 
    links_list = []
    
    #loop for extracting links from the webpage
    for link in links:
        links_list.append(link.get('href'))
        
    d = {"title":[], "price":[], "rating":[], "reviews":[],"availability":[]}
    
    #Loop for extracting product_details from each link
    for link in links_list:
        new_webpage = requests.get("https://www.amazon.com" + link, headers = HEADERS)
        
        new_soup = BeautifulSoup(new_webpage.content, "html.parser")
        
        #function to call to add data
        d['title'].append(get_title(new_soup))
        d['price'].append(get_price(new_soup))
        d['rating'].append(get_rating(new_soup))
        d['reviews'].append(get_review_number(new_soup))
        d['availability'].append(get_availibility(new_soup))
    
    #creating a dataFrame from Dictionary Pandas as pd
    amazon_df = pd.DataFrame.from_dict(d)
    
    #replace the empty titles with nan
    amazon_df['title'] = amazon_df['title'].replace('', np.nan)
    
    #droping the NA columns
    amazon_df = amazon_df.dropna(subset = ['title'])
    
    #creating a csv with Scraped Data
    amazon_df.to_csv("Amazon_Scraping.csv", header = True, index = False)


In [15]:
amazon_df

Unnamed: 0,title,price,rating,reviews,availability
1,Sony Playstation PS4 1TB Black Console,,4.6 out of 5 stars,"1,453 ratings",Not Availible
2,PlayStation 4 500GB Console [Old Model][Discon...,,4.6 out of 5 stars,"13,582 ratings",Not Availible
3,PlayStation®5 Digital Edition (slim),,4.7 out of 5 stars,"4,912 ratings",Not Availible
4,PlayStation 4 Slim 500GB Console [Discontinued],,4.5 out of 5 stars,"1,893 ratings",Not Availible
5,PlayStation 4 DualShock 4 Bundle [Discontinued],,4.3 out of 5 stars,510 ratings,Not Availible
6,OIVO PS4 Stand Cooling Fan Station for Playsta...,,4.5 out of 5 stars,"45,395 ratings",Not Availible
7,PlayStation 4 Slim 1TB Console - Marvel's Spid...,,4.7 out of 5 stars,"2,818 ratings",Not Availible
8,"PS4 Controller Charger Dock Station, OIVO 1.8H...",,4.7 out of 5 stars,"37,906 ratings",Not Availible
9,DualShock 4 Wireless Controller for PlayStatio...,,3.9 out of 5 stars,"1,703 ratings",Not Availible
10,PowerA USB Charging Cable for PlayStation 4,,4.6 out of 5 stars,"29,332 ratings",Not Availible
