# Amazon Web Scraping

In [1]:
#Request libraries allows you to send http request like getting data from websites and API's
#Beautifulsoup is use to parse HTML and XML contains
#Parsing means breaking down into smaller more manageble pieces to understand.
#Pandas use for data manipulation
#Random is use to select random user agent from the list
#Time is use for making time delay
import requests   
from bs4 import BeautifulSoup
import pandas as pd
import random
import time

# Rotating User-Agent headers
#this is list of user agents to not catch as a bot
#here are three user agents
user_agents = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
]

headers = {
    "User-Agent": random.choice(user_agents),   #this selects random user agents from those three
    "Accept-Language": "en-US,en;q=0.9",
}

# URL to scrape
#this is url from where we scrap data
url = "https://www.amazon.in/s?k=mobile+phones&crid=26WINVLOVBSPZ&sprefix=%2Caps%2C620&ref=nb_sb_ss_recent_1_0_recent"

#request to website for scarping data
response = requests.get(url, headers=headers)


if response.status_code == 200:
    soup = BeautifulSoup(response.content, "html.parser")
    
    # Initializing lists for product data
    product_names = []
    product_prices = []
    product_ratings = []

    # Locating the main slot containing all products
    product_container = soup.find("div", {"class": "s-desktop-width-max s-desktop-content s-opposite-dir s-wide-grid-style sg-row"})

    # Iterating over individual products
    if product_container:
        for product in product_container.find_all("div", {"data-component-type": "s-search-result"}):
            # Extracting product name
            name = product.find("h2", {"class": "a-size-medium a-spacing-none a-color-base a-text-normal"})
            # Extracting product price
            price = product.find("span", {"class": "a-offscreen"})
            # Extracting product rating
            rating = product.find("a", {"class": "a-popover-trigger a-declarative"})

            #adding information of products to the list
            product_names.append(name.text.strip() if name else "N/A")
            product_prices.append(price.text.strip() if price else "N/A")
            product_ratings.append(rating.text.strip() if rating else "N/A")

    # Saving fetched data to DataFrame
    data = {
        "Product Name": product_names,
        "Price (₹)": product_prices,
        "Rating": product_ratings,
    }
    df = pd.DataFrame(data)

    # Saving the dataframe to a CSV file
    df.to_csv("amazon_products.csv", index=False)
    print("Scraping completed. Data saved to amazon_products.csv")
    print(df.head())
else:
    print(f"Failed to fetch the page. Status code: {response.status_code}")

Scraping completed. Data saved to amazon_products.csv
                                        Product Name Price (₹)  \
0  OnePlus Nord CE4 Lite 5G (Super Silver, 8GB RA...   ₹17,998   
1  OnePlus Nord 4 5G (Oasis Green, 8GB RAM, 256GB...   ₹29,999   
2  Samsung Galaxy M05 (Mint Green, 4GB RAM, 64 GB...    ₹6,299   
3  Samsung Galaxy A05 (Light Green, 6GB, 128GB St...    ₹7,915   
4  realme GT 6T 5G (Fluid Silver,12GB RAM+256GB S...   ₹35,999   

               Rating  
0  4.1 out of 5 stars  
1  4.2 out of 5 stars  
2  4.0 out of 5 stars  
3  3.8 out of 5 stars  
4  4.3 out of 5 stars  


In [2]:
df.head()

Unnamed: 0,Product Name,Price (₹),Rating
0,"OnePlus Nord CE4 Lite 5G (Super Silver, 8GB RA...","₹17,998",4.1 out of 5 stars
1,"OnePlus Nord 4 5G (Oasis Green, 8GB RAM, 256GB...","₹29,999",4.2 out of 5 stars
2,"Samsung Galaxy M05 (Mint Green, 4GB RAM, 64 GB...","₹6,299",4.0 out of 5 stars
3,"Samsung Galaxy A05 (Light Green, 6GB, 128GB St...","₹7,915",3.8 out of 5 stars
4,"realme GT 6T 5G (Fluid Silver,12GB RAM+256GB S...","₹35,999",4.3 out of 5 stars
