<a href="https://colab.research.google.com/github/Jeevana023/Skillcraft_technologies/blob/main/E_commerce_Product_Scraper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import csv
from bs4 import BeautifulSoup
import requests # You'll need to install this: pip install requests
import re # For regular expressions, useful for cleaning data

def scrape_product_info(url=None):
    """
    Scrapes product information (name, price, rating) from a given URL.
    This function uses a simulated HTML for demonstration.
    For a real website, you would use requests.get(url).
    """
    print(f"Attempting to scrape from: {url if url else 'Simulated HTML'}")

    # --- SIMULATED HTML CONTENT FOR DEMONSTRATION ---
    # In a real scenario, you would fetch HTML from a URL like this:
    # try:
    #     response = requests.get(url)
    #     response.raise_for_status() # Raise an exception for HTTP errors
    #     html_content = response.text
    # except requests.exceptions.RequestException as e:
    #     print(f"Error fetching URL {url}: {e}")
    #     return [] # Return empty list if there's an error

    html_content = """
    <html>
    <head><title>Product Listing</title></head>
    <body>
        <h1>Featured Products</h1>
        <div class="product-item" data-product-id="101">
            <h2 class="product-name">Super Widget Pro</h2>
            <span class="product-price">$29.99</span>
            <div class="product-rating">
                <span class="stars">★★★★☆</span>
                <span class="reviews">(125 reviews)</span>
            </div>
        </div>
        <div class="product-item" data-product-id="102">
            <h2 class="product-name">Mega Gadget Elite</h2>
            <span class="product-price">$149.50</span>
            <div class="product-rating">
                <span class="stars">★★★★★</span>
                <span class="reviews">(340 reviews)</span>
            </div>
        </div>
        <div class="product-item" data-product-id="103">
            <h2 class="product-name">Basic Gizmo Standard</h2>
            <span class="product-price">Free</span>
            <div class="product-rating">
                <span class="stars">★★★☆☆</span>
                <span class="reviews">(50 reviews)</span>
            </div>
        </div>
        <div class="product-item" data-product-id="104">
            <h2 class="product-name">Advanced Doodad X</h2>
            <span class="product-price">$75.00</span>
            <div class="product-rating">
                <span class="stars">★★★★☆</span>
                <span class="reviews">(200 reviews)</span>
            </div>
        </div>
        <!-- Another product without a rating for robustness test -->
        <div class="product-item" data-product-id="105">
            <h2 class="product-name">Simple Tool</h2>
            <span class="product-price">$10.00</span>
            <div class="product-rating">
                <!-- No stars here -->
                <span class="reviews">(10 reviews)</span>
            </div>
        </div>
    </body>
    </html>
    """
    # --- END SIMULATED HTML CONTENT ---

    soup = BeautifulSoup(html_content, 'html.parser')
    products_data = []

    # Find all product containers. You'll need to inspect the actual website's HTML
    # to find the correct class or tag for product items.
    product_containers = soup.find_all('div', class_='product-item')

    if not product_containers:
        print("No product containers found. Check your 'product-item' class selector.")

    for container in product_containers:
        name = container.find('h2', class_='product-name')
        price = container.find('span', class_='product-price')
        rating_stars = container.find('span', class_='stars') # Assuming stars are in a span with class 'stars'

        product_name = name.get_text(strip=True) if name else 'N/A'
        product_price = price.get_text(strip=True) if price else 'N/A'
        product_rating = rating_stars.get_text(strip=True) if rating_stars else 'N/A'

        # Optional: Clean up price (e.g., remove currency symbols, convert to float)
        if product_price != 'N/A':
            product_price = re.sub(r'[^\d.]', '', product_price) # Remove non-digit, non-dot characters
            try:
                product_price = float(product_price)
            except ValueError:
                pass # Keep as string if conversion fails (e.g., "Free")

        products_data.append({
            'Name': product_name,
            'Price': product_price,
            'Rating': product_rating
        })

    return products_data

def save_to_csv(data, filename="products.csv"):
    """
    Saves the extracted product data to a CSV file.
    """
    if not data:
        print("No data to save.")
        return

    # Define the fieldnames (CSV headers) based on the keys in your dictionaries
    fieldnames = ['Name', 'Price', 'Rating']

    try:
        with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader() # Write the header row
            writer.writerows(data) # Write all product data rows
        print(f"Product data successfully saved to {filename}")
    except IOError as e:
        print(f"Error saving to CSV file {filename}: {e}")

if __name__ == "__main__":
    # Example usage:
    # For a real website, uncomment the line below and provide a URL
    # product_url = "https://www.example-ecommerce.com/products"
    # extracted_products = scrape_product_info(product_url)

    # Using the simulated HTML for demonstration
    extracted_products = scrape_product_info()

    if extracted_products:
        print("\nExtracted Product Data:")
        for product in extracted_products:
            print(product)
        print("\n")
        save_to_csv(extracted_products, "ecommerce_products.csv")
    else:
        print("No product data extracted.")

Attempting to scrape from: Simulated HTML

Extracted Product Data:
{'Name': 'Super Widget Pro', 'Price': 29.99, 'Rating': '★★★★☆'}
{'Name': 'Mega Gadget Elite', 'Price': 149.5, 'Rating': '★★★★★'}
{'Name': 'Basic Gizmo Standard', 'Price': '', 'Rating': '★★★☆☆'}
{'Name': 'Advanced Doodad X', 'Price': 75.0, 'Rating': '★★★★☆'}
{'Name': 'Simple Tool', 'Price': 10.0, 'Rating': 'N/A'}


Product data successfully saved to ecommerce_products.csv
