In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

BASE_URL = "https://www.amazon.in/s?k=smartphones"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9"
}

def fetch_product_data(page):
    url = f"{BASE_URL}&page={page}"
    response = requests.get(url, headers=HEADERS)
    if response.status_code != 200:
        print(f"Failed to fetch page {page}: {response.status_code}")
        return []

    soup = BeautifulSoup(response.text, 'html.parser')
    products = soup.find_all("div", {"data-component-type": "s-search-result"})

    data = []
    for product in products:
        try:
            title_tag = product.find("h2")
            title = title_tag.text.strip() if title_tag else "N/A"

            price_tag = product.find("span", "a-price-whole")
            price = price_tag.text.replace(',', '').strip() if price_tag else "N/A"

            rating_tag = product.find("span", "a-icon-alt")
            rating = rating_tag.text.split()[0] if rating_tag else "N/A"

            brand = title.split()[0] if title != "Title Not Available" else "N/A"

            data.append({
                "Title": title,
                "Price": price,
                "Rating": rating,
                "Brand": brand
                
            })
        except Exception as e:
            print(f"Error while processing a product: {e}")

    return data

all_data = []
for page in range(1, 21):  
    print(f"Fetching page {page}...")
    products = fetch_product_data(page)
    all_data.extend(products)

    if len(all_data) >= 200:
        break
    time.sleep(2)

df = pd.DataFrame(all_data[:200])  
df.to_csv("C:/Users/Admin/Desktop/Projects/AIML intern assignment/scrapped_amazon_products.csv", index=False)

print("Scraping completed. Data saved to 'amazon_products.csv'.")


Fetching page 1...
Fetching page 2...
Fetching page 3...
Fetching page 4...
Fetching page 5...
Fetching page 6...
Fetching page 7...
Fetching page 8...
Fetching page 9...
Fetching page 10...
Scraping completed. Data saved to 'amazon_products.csv'.
