### IMPORTING REQUIRED LIBRARIES

In [61]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

### Headers with a User-Agent to mimic a browser request.

In [3]:
url = "https://www.amazon.com/s?k=laptop&crid=3N8TJKYD720OO&sprefix=la%2Caps%2C910&ref=nb_sb_noss_2"

In [4]:
Headers = ({"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
            "Accept-Language": 'en-US, en;q=0.5'})

In [5]:
webpage = requests.get(url, headers=Headers)

In [6]:
webpage

<Response [200]>

In [7]:
#webpage.content

In [8]:
type(webpage.content)

bytes

### bs4 for parsing HTML.

In [9]:
soup = BeautifulSoup(webpage.content, "html.parser")

In [10]:
#soup

In [11]:
links = soup.select("a.a-link-normal.s-line-clamp-2")

In [41]:
len(links)

22

In [13]:
link = links[0].get('href')

In [14]:
product_list = "http://amazon.com"+link

In [15]:
product_list

'http://amazon.com/sspa/click?ie=UTF8&spc=MTozMTMyNjMxNTI1NDk3OTc1OjE3NjQyMTY0MjA6c3BfYXRmOjMwMDY2MTI0Nzk5MTYwMjo6MDo6&url=%2FNIMO-laptop-15.6-student-computer%2Fdp%2FB0D44YZQBM%2Fref%3Dsr_1_1_sspa%3Fcrid%3D3N8TJKYD720OO%26dib%3DeyJ2IjoiMSJ9.8h14joHyf6QLrsy0A_ITBSpPZ4QQoGMSK6s7lsYlcrjS7DVUutGAq8dim94HAhzHEHhMqUfVmEV3BlLsKL8ao1X9CR3kBvVUMTPvKjY4WGSVV8r3GbJDZh5vM5I_mZ0RTHUItTlHL5kjMArhDVsnIVQWC0W90fNLMc_11_mvNpSLmNq3668U0Tgegqz3eygb365b1WlnXBra0Njkvbfoj4QExRe3ywWDGpSw2c2nVPw.mx7yBMvhZeogF-UtPS0QTGovr4x-I4zpZiljVwMBIS4%26dib_tag%3Dse%26keywords%3Dlaptop%26qid%3D1764216420%26sprefix%3Dla%252Caps%252C910%26sr%3D8-1-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9hdGY%26psc%3D1'

In [16]:
new_webpage = requests.get(product_list, headers=Headers)

In [17]:
new_webpage

<Response [200]>

In [18]:
new_soup = BeautifulSoup(new_webpage.content, "html.parser")

In [19]:
#new_soup

In [20]:
new_soup.find("span", attrs={"id":'productTitle'}).text.strip()

'NIMO 15.6 IPS FHD-Laptop, 16GB RAM 1TB SSD Intel Pentium Quad Core N100, Computer with Backlit Keyboard Fingerprint (Beat to i3-1115G4 Up to 3.4GHz) Laptops for Student, Win 11 Rose Gold'

In [21]:
price_tag = new_soup.find("span", class_="a-offscreen")

In [22]:
price_tag

In [23]:
price = price_tag.text.strip() if price_tag else "NA"

In [24]:
price

'NA'

In [28]:
rating = new_soup.find("span", attrs={"class":'a-icon-alt'}).text 

In [29]:
rating

'4.5 out of 5 stars'

In [30]:
ad_tag = new_soup.find("span", string=lambda x: x and "sponsored" in x.lower())

In [31]:
ad_or_organic = "ad" if ad_tag else "organic"

In [32]:
ad_or_organic

'ad'

In [42]:
img_tag = soup.find("img", class_="s-image")

In [45]:
img = img_tag.get("src") if img_tag else "NA"

In [46]:
img

'https://m.media-amazon.com/images/I/71VvzfO4neL._AC_UY218_.jpg'

### FUNCTION FOR PARSING HTML AND STRONG THE DATA IN A LIST

In [50]:
def scrape_amazon_products(l):
    data = []
    for i in range(len(l)):
        link = l[i].get('href')
        product_list = "http://amazon.com"+link
        new_webpage = requests.get(product_list, headers=Headers)
        new_soup = BeautifulSoup(new_webpage.content, "html.parser")
        try:
            title = new_soup.find("span", attrs={"id":'productTitle'}).text.strip()
            price_tag = new_soup.find("span", class_="a-offscreen")
            price = price_tag.text.strip() if price_tag else "NA"
            rating = new_soup.find("span", attrs={"class":'a-icon-alt'}).text
            ad_tag = new_soup.find("span", string=lambda x: x and "sponsored" in x.lower())
            ad_or_organic = "ad" if ad_tag else "organic"
            img_tag = soup.find("img", class_="s-image")
            img = img_tag.get("src") if img_tag else "NA"
            data.append({
                "Title": title,
                "Price": price,
                "Rating": rating,
                "Ad/Organic": ad_or_organic,
                "Image" : img,
            })
        except:
            data.append({
                  "Title": "NA",
                "Price": "NA",
                "Rating": "NA",
                "Ad/Organic": "NA",
                "Image" : "NA",
            })
    return data


### CONVERTING THE LIST TO A DATA FRAME WITH TIMESTAMP

In [None]:
data1 = scrape_amazon_products(links)

In [None]:
from datetime import datetime

In [None]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"amazon_products_{timestamp}.csv"

In [None]:
df = pd.DataFrame(data1)

In [None]:
df.head()

In [59]:
df.to_csv(filename, index=False)