In [1]:
# import necessary modules
from bs4 import BeautifulSoup
import requests
import pandas as pd

# Important functions 

In [2]:
# to get Product title
def get_title(dom):
    try:
        title = dom.find('span',attrs={"id":"productTitle"}).contents[0].strip()
    except:
        title = ''
    
    return title 


# to get Product price
def get_price(dom):
    try:
        price = dom.find('span',attrs={"class":"a-price-whole"}).contents[0].strip()
    except:
        price = ''
    
    return price

# to get Product ratings
def get_ratings(dom):
    try:
        ratings = dom.find('span',attrs={"class":"a-size-medium a-color-base"}).contents[0].strip() + ' stars'
    except:
        ratings = ''
    
    return ratings 

# to get no_of_reviews
def get_no_of_reviews(dom):
    try:
        reviews = dom.find('span',attrs={"id":"acrCustomerReviewText"}).contents[0].strip()
    except:
        reviews = ''
    
    return reviews 

# to get Product availability
def get_availability(dom):
    try:
        available = dom.find('span',attrs={"class":"a-size-medium a-color-success"}).contents[0].strip()
    except:
        available = ''
    
    return available 

# Main Code to extract data  

In [3]:
# params
URl = "https://www.amazon.in/s?k=playstation+5&crid=187H30T6PFLVZ&sprefix=playstation+%2Caps%2C198&ref=nb_sb_ss_ts-doa-p_1_12"
Headers = {'User-Agent':
               'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0',
           'Accept-Language':'en-US,en;q=0.5'
          }

In [4]:
# http get request
resp_obj = requests.get(URl , headers=Headers)

In [5]:
type(resp_obj.content)

bytes

In [6]:
type(resp_obj.text)

str

In [7]:
# beautiful soup object
soup = BeautifulSoup(resp_obj.text,'html.parser')

In [8]:
# find all anchor tag
links = soup.find_all('a',attrs={"class": "a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal"})

In [9]:
data = {'Product_title':[],
        'Price(Rs)':[],
        'Rating_stars':[],
        'No_of_review':[],
        'Available':[]}
for link in links:
    half_url = link.get('href')
    new_url = 'https://amazon.in/' + half_url
    resp_obj = requests.get(new_url , headers=Headers)
    dom = BeautifulSoup(resp_obj.text,'html.parser')
    data['Product_title'].append(get_title(dom))
    data['Price(Rs)'].append(get_price(dom))
    data['Rating_stars'].append(get_ratings(dom))
    data['No_of_review'].append(get_no_of_reviews(dom))
    data['Available'].append(get_availability(dom))
    


In [10]:
df = pd.DataFrame(data)


In [11]:
import numpy as np
df.replace('',np.nan,inplace=True)

In [12]:
df

Unnamed: 0,Product_title,Price(Rs),Rating_stars,No_of_review,Available
0,God Of War Ragnarok | Launch Edition | PS5 Gam...,4999.0,4.6 out of 5 stars,110 ratings,In stock.
1,Sonic Frontiers | Standard Edition | PlayStati...,3499.0,5 out of 5 stars,1 rating,In stock.
2,PS5 Grand Theft Auto V,2099.0,4.4 out of 5 stars,104 ratings,In stock.
3,PlayStation 5 – The Best Hidden Features and E...,221.0,2 out of 5 stars,1 rating,
4,Need For Speed Unbound | Standard Edition | Pl...,4438.0,4.1 out of 5 stars,2 ratings,In stock.
5,PS5 WWE 2K22,1999.0,4.2 out of 5 stars,58 ratings,In stock.
6,Horizon Forbidden West | Standard Edition | PS...,2683.0,4.6 out of 5 stars,210 ratings,In stock.
7,Sony DualSense Wireless Controller for PlaySta...,5098.0,4.4 out of 5 stars,"1,305 ratings",In stock.
8,Gran Turismo 7 | Standard Edition | PS5 Game (...,2697.0,4.1 out of 5 stars,122 ratings,In stock.
9,CRASOME Dust Cover for PS5 Lining Dust Guard f...,298.0,4.5 out of 5 stars,2 ratings,In stock.


saving dataframe in csv format 

In [13]:
df.to_csv('./Scraped_data_from_amazon.csv',header=True,index=False)