# Script

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re

In [None]:
def get_name(soup):
    try:
        name = soup.find("span", attrs = { "id":"productTitle"}).text.strip()
    except AttributeError:
        name = ""
    return name


def get_price(soup):
  try:
      price = soup.find("span", attrs = { "class":"a-price-whole" }).text.strip()
  except AttributeError:
      price = ""
  return price


def get_rating(soup):
  try:
      rating = soup.find("span", attrs = { "class":"a-icon-alt" }).text
  except AttributeError:
      rating = ""
  return rating


def get_reviews(soup):
  try:
      reviews = soup.find("span", attrs = { "id":"acrCustomerReviewText" }).text.strip()
  except AttributeError:
      reviews = ""
  return reviews


def get_description(soup):
  try:
      description = soup.find("ul", attrs = { "class":"a-unordered-list a-vertical a-spacing-mini" }).text.strip().split(',')
  except AttributeError:
    description = ""
  return description


def get_asin(soup):
    asin = re.search(r'/[dg]p/([^/]+)', soup, flags=re.IGNORECASE)
    if asin:
      return asin.group(1)
    return ""


def get_manufacturer(soup):
  try:
    manu = soup.find("table", attrs = {"id":"productDetails_techSpec_section_1"}).find('th', string=' Manufacturer ').find_next_sibling('td').text
    manu = str(BeautifulSoup(manu, 'html.parser'))
    manu = manu.strip()
    manu = manu.replace("\u200e", "" )
  except AttributeError:
      manu = ""
  return manu



if __name__ == '__main__':

  d = {"PRODUCT URL" : [], "PRODUCT NAME" : [], "PRODUCT PRICE" : [], "RATING" : [], "NUMBER OF REVIEWS" : [], "DESCRIPTION" : [], "ASIN" : [], "MANUFACTURER" : [] }
  HEADERS = ({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', 'Acccept-Language': 'en-US, en; q = 0.5'})
  all_links = []
  
  for x in range(1, 21):

      URL = "https://www.amazon.in/s?k=bags&page={}&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_1".format(x)

      webpage = requests.get(URL, headers = HEADERS)

      soup = BeautifulSoup(webpage.content, "html.parser")

      links = soup.find_all("a", attrs = {'class' : 'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})
      
      for link in links:
            all_links.append(link.get('href'))

  for link in all_links:
      new_webpage = requests.get("https://amazon.in" + link, headers = HEADERS)
      new_soup = BeautifulSoup(new_webpage.content, 'html.parser')

      d['PRODUCT URL'].append("https://amazon.in" + link)
      d['PRODUCT NAME'].append(get_name(new_soup))
      d['PRODUCT PRICE'].append(get_price(new_soup))
      d['RATING'].append(get_rating(new_soup))
      d['NUMBER OF REVIEWS'].append(get_reviews(new_soup))
      d['DESCRIPTION'].append(get_description(new_soup))
      d['ASIN'].append(get_asin("https://amazon.in" + link))
      d['MANUFACTURER'].append(get_manufacturer(new_soup))

  product_list = pd.DataFrame.from_dict(d)
  product_list['PRODUCT NAME'].replace('', np.nan, inplace=True)
  product_list = product_list.dropna(subset=['PRODUCT NAME'])
  product_list.to_csv("amazon_data.csv", header=True, index=False)

In [9]:
product_list

Unnamed: 0,PRODUCT URL,PRODUCT NAME,PRODUCT PRICE,RATING,NUMBER OF REVIEWS,DESCRIPTION,ASIN,MANUFACTURER
0,https://amazon.in/American-Tourister-AMT-SCH-0...,American Tourister 32 Ltrs Black Casual Backpa...,1199.,4.1 out of 5 stars,"52,287 ratings","[Laptop Compatibility: No, Strap Type: Adjust...",B07CJCGM1M,Samsonite
1,https://amazon.in/Wesley-Milestone-Waterproof-...,Wesley Milestone 2.0 Casual Waterproof Laptop ...,565.,4.3 out of 5 stars,"9,793 ratings",[30L Capacity: The Backpack has a padded lapto...,B084JGJ8PF,"Wesley, Longani Trading Company, F-82, Sector-..."
2,https://amazon.in/Lavie-Sport-Duffle-Luggage-T...,Lavie Sport Lino Large Size 63 cms Wheel Duffl...,949.,3.9 out of 5 stars,"6,296 ratings",[The Lino wheel Duffle Bag from Lavie Sport is...,B097RJ22Q3,
3,https://amazon.in/ADISA-Laptop-Backpack-Office...,ADISA 15.6 inch Laptop Backpack Office Bag Col...,499.,3.8 out of 5 stars,564 ratings,[Material: Water Resistant Light-Weight Polyes...,B09TPX22NF,
4,https://amazon.in/Skybags-Brat-Black-Casual-Ba...,Skybags Brat Black 46 Cms Casual Backpack,659.,4.1 out of 5 stars,"3,770 ratings",[Combination of functional & safety features i...,B08Z1HHHTD,
...,...,...,...,...,...,...,...,...
272,https://amazon.in/Gear-Polyester-Laptop-Backpa...,Gear ECO 1 Black Orange Red Laptop Backpack,854.,4.3 out of 5 stars,"1,357 ratings","[Outer Material: Polyester, Color: Jet Black ...",B012NW1H1I,
274,https://amazon.in/Casual-Travel-Backpack-Schoo...,Airish 45 ltrs (46 Cms)Backpack(ARU-202_Green),664,3.6 out of 5 stars,154 ratings,[Fit Type: Regular SPACIOUS AND LIGHTWEIGHT...,B09PVCF9F8,
283,https://amazon.in/F-Gear-President-Brown-Lapto...,F Gear President Brown 30 liter Laptop Backpac...,1011.,4.3 out of 5 stars,510 ratings,"[Professional, Good Looking and Huge. Larg...",B06XW7FSHF,
284,https://amazon.in/F-Gear-Talent-Laptop-Backpac...,F Gear Talent Laptop Backpack With Rain Cover ...,931,4.4 out of 5 stars,"1,103 ratings",[Water Proof and Lightweight. Dimension HxL...,B078KS2FX1,


The Manufacurer column is largely NULL, this is because of the variation of the product page format. Hardcoding various formats is a possible solution however it will take time .