In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import time
import random

user_agents = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)',
    'Mozilla/5.0 (X11; Ubuntu; Linux x86_64)',
    'Mozilla/5.0 (iPhone; CPU iPhone OS 13_5_1)',
]

headers = {
    'User-Agent': random.choice(user_agents),
    'Accept-Language': 'en-US,en;q=0.9',
}

def scrape_amazon_products(search_term, pages=2):
    products = []
    base_url = "https://www.amazon.com/s?k={query}&page={page_num}"

    for page in range(1, pages + 1):
        url = base_url.format(query=search_term.replace(" ", "+"), page_num=page)
        print(f"Scraping page {page}: {url}")

        response = requests.get(url, headers=headers)
        if response.status_code != 200:
            print("Request blocked or failed.")
            break

        soup = BeautifulSoup(response.content, 'html.parser')
        results = soup.select('div[data-component-type="s-search-result"]')

        for item in results:
            try:
                title_tag = item.h2
                title = title_tag.text.strip() if title_tag else ""

                link = "https://www.amazon.com" + title_tag.a['href'] if title_tag and title_tag.a else ""

                price_tag = item.select_one(".a-price .a-offscreen")
                price = price_tag.text.strip() if price_tag else ""

                rating_tag = item.select_one(".a-icon-alt")
                rating = rating_tag.text.strip().split(" ")[0] if rating_tag else ""

                rating_count_tag = item.select_one("span[aria-label*=' ratings']")
                ratingcount = rating_count_tag['aria-label'].split(" ")[0].replace(",", "") if rating_count_tag else ""

                # Find 'sold_quantity' like "1,000+ bought in past month"
                sold_tag = item.find("span", string=lambda text: text and "bought in past" in text)
                sold_quantity = sold_tag.text.strip().split(" ")[0] if sold_tag else ""

                products.append({
                    "title": title,
                    "price": price,
                    "rating": rating,
                    "sold_quantity": sold_quantity
                })
            except Exception as e:
                print("Error parsing product:", e)

        # Sleep to avoid detection
        time.sleep(random.uniform(2, 5))

    return products

def save_to_csv(data, filename="amazon_products Raincoat.csv"):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        fieldnames = ["title", "price", "rating", "sold_quantity"]
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)

if __name__ == "__main__":
    products = scrape_amazon_products("Raincoat", pages=10)  # You can increase pages slowly if needed
    save_to_csv(products)
    print(f"Saved {len(products)} products to CSV.")

Scraping page 1: https://www.amazon.com/s?k=Raincoat&page=1
Scraping page 2: https://www.amazon.com/s?k=Raincoat&page=2
Scraping page 3: https://www.amazon.com/s?k=Raincoat&page=3
Scraping page 4: https://www.amazon.com/s?k=Raincoat&page=4
Scraping page 5: https://www.amazon.com/s?k=Raincoat&page=5
Scraping page 6: https://www.amazon.com/s?k=Raincoat&page=6
Scraping page 7: https://www.amazon.com/s?k=Raincoat&page=7
Scraping page 8: https://www.amazon.com/s?k=Raincoat&page=8
Scraping page 9: https://www.amazon.com/s?k=Raincoat&page=9
Scraping page 10: https://www.amazon.com/s?k=Raincoat&page=10
Saved 414 products to CSV.


In [None]:
import requests
from bs4 import BeautifulSoup
import csv
import time
import random

# List of proxy IPs
proxies_list = [
    "http://123.456.78.90:8080",
    "http://98.76.54.32:3128",
    "http://111.222.333.444:8000",
    # "http://144.22.175.58",
    # "http://57.129.81.201",
    # "http://117.6.51.7",
    # "http://116.108.8.140",
    # "http://207.166.179.110",
    # "http://207.166.179.107"
    # Add more proxies here
]

# Rotating User-Agents
user_agents = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)',
    'Mozilla/5.0 (X11; Ubuntu; Linux x86_64)',
    'Mozilla/5.0 (iPhone; CPU iPhone OS 13_5_1)',
]

def scrape_amazon_products(search_term, pages=2):
    products = []
    base_url = "https://www.amazon.com/s?k={query}&page={page_num}"

    for page in range(1, pages + 1):
        url = base_url.format(query=search_term.replace(" ", "+"), page_num=page)
        print(f"Scraping page {page}: {url}")

        # Randomize proxy and headers per request
        proxy = {"http": random.choice(proxies_list), "https": random.choice(proxies_list)}
        headers = {
            'User-Agent': random.choice(user_agents),
            'Accept-Language': 'en-US,en;q=0.9',
        }

        try:
            response = requests.get(url, headers=headers, proxies=proxy, timeout=10)
        except Exception as e:
            print(f"Request failed due to proxy error: {e}")
            continue

        if response.status_code != 200:
            print(f"Request blocked or failed. Status: {response.status_code}")
            continue

        soup = BeautifulSoup(response.content, 'html.parser')
        results = soup.select('div[data-component-type="s-search-result"]')

        for item in results:
            try:
                title_tag = item.h2
                title = title_tag.text.strip() if title_tag else ""

                link = "https://www.amazon.com" + title_tag.a['href'] if title_tag and title_tag.a else ""

                price_tag = item.select_one(".a-price .a-offscreen")
                price = price_tag.text.strip() if price_tag else ""

                rating_tag = item.select_one(".a-icon-alt")
                rating = rating_tag.text.strip().split(" ")[0] if rating_tag else ""

                rating_count_tag = item.select_one("span[aria-label*=' ratings']")
                ratingcount = rating_count_tag['aria-label'].split(" ")[0].replace(",", "") if rating_count_tag else ""

                sold_tag = item.find("span", string=lambda text: text and "bought in past" in text)
                sold_quantity = sold_tag.text.strip().split(" ")[0] if sold_tag else ""

                products.append({
                    "title": title,
                    "price": price,
                    "rating": rating,
                    "sold_quantity": sold_quantity
                })
            except Exception as e:
                print("Error parsing product:", e)

        # Delay to avoid detection
        time.sleep(random.uniform(3, 7))

    return products

def save_to_csv(data, filename="amazon_products Raincoat.csv"):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        fieldnames = ["title", "price", "rating", "sold_quantity"]
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)

if __name__ == "__main__":
    products = scrape_amazon_products("Raincoat", pages=10)
    save_to_csv(products)
    print(f"Saved {len(products)} products to CSV.")