In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re # For cleaning text


def scrape_mock_product_data(search_query):
    """
    Simulates scraping product data from a mock HTML structure.
    In a real scenario, you would fetch content from a URL.
    """
    print(f"Simulating search for: '{search_query}'")

    # This is a MOCK HTML string representing typical product listings.
    # In a real scenario, this would be the content fetched from a URL.
    mock_html_content = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <title>Mock Product Search - {search_query}</title>
    </head>
    <body>
        <div class="product-listing">
            <h2>Search Results for "{search_query}"</h2>
            <div class="product-item" data-product-id="101">
                <h3 class="product-name">Luxury {search_query} Pro X</h3>
                <p class="product-price">Price: &#8377; 75,999</p>
                <div class="product-rating">Rating: 4.5 out of 5 stars</div>
                <a href="/product/101">View Details</a>
            </div>
            <div class="product-item" data-product-id="102">
                <h3 class="product-name">Budget {search_query} Lite</h3>
                <p class="product-price">Price: &#8377; 12,499</p>
                <div class="product-rating">Rating: 3.8 out of 5 stars</div>
                <a href="/product/102">View Details</a>
            </div>
            <div class="product-item" data-product-id="103">
                <h3 class="product-name">Premium {search_query} Max 2000</h3>
                <p class="product-price">Price: &#8377; 1,20,000</p>
                <div class="product-rating">Rating: 4.9 out of 5 stars</div>
                <a href="/product/103">View Details</a>
            </div>
            <div class="product-item" data-product-id="104">
                <h3 class="product-name">Basic {search_query} Model</h3>
                <p class="product-price">Price: &#8377; 8,999</p>
                <div class="product-rating">Rating: 4.2 out of 5 stars</div>
                <a href="/product/104">View Details</a>
            </div>
        </div>
    </body>
    </html>
    """

    soup = BeautifulSoup(mock_html_content, 'html.parser')
    products = []

    # Find all product items based on their HTML class
    product_items = soup.find_all('div', class_='product-item')

    if not product_items:
        print("No product items found in the mock HTML.")
        return []

    for item in product_items:
        name = item.find('h3', class_='product-name')
        price = item.find('p', class_='product-price')
        rating = item.find('div', class_='product-rating')

        # Extract text and clean it
        product_name = name.get_text(strip=True) if name else 'N/A'
        product_price = price.get_text(strip=True).replace('Price:', '').strip() if price else 'N/A'
        product_rating = rating.get_text(strip=True).replace('Rating:', '').strip() if rating else 'N/A'

        # Example of cleaning price (remove currency symbol, commas)
        cleaned_price = re.sub(r'[^\d.]', '', product_price)
        try:
            cleaned_price = float(cleaned_price) # Convert to float for numerical operations later
        except ValueError:
            cleaned_price = product_price # Keep as string if conversion fails

        products.append({
            'Name': product_name,
            'Price': cleaned_price,
            'Rating': product_rating
        })
    return products

def save_to_excel(data, filename="product_details.xlsx"):
    """
    Saves the list of product dictionaries to an Excel file.
    """
    if not data:
        print("No data to save to Excel.")
        return

    df = pd.DataFrame(data)
    try:
        df.to_excel(filename, index=False)
        print(f"\nSuccessfully saved {len(data)} products to '{filename}'")
    except Exception as e:
        print(f"Error saving to Excel: {e}")

if __name__ == "__main__":
    while True:
        search_query = input("Enter product to search for (e.g., 'Laptop', 'Smartphone', 'Headphones') or 'exit' to quit: ").strip()
        if search_query.lower() == 'exit':
            break

        # Simulate scraping for the given query
        product_data = scrape_mock_product_data(search_query)

        if product_data:
            print("\n--- Scraped Product Data ---")
            for product in product_data:
                print(f"Name: {product['Name']}, Price: {product['Price']}, Rating: {product['Rating']}")
            print("----------------------------")

            # Save the data to an Excel file
            save_to_excel(product_data, f"{search_query.replace(' ', '_').lower()}_products.xlsx")
        else:
            print("Could not retrieve product data for this query.")

    print("Exiting script.")

Enter product to search for (e.g., 'Laptop', 'Smartphone', 'Headphones') or 'exit' to quit: 
Simulating search for: ''

--- Scraped Product Data ---
Name: Luxury  Pro X, Price: 75999.0, Rating: 4.5 out of 5 stars
Name: Budget  Lite, Price: 12499.0, Rating: 3.8 out of 5 stars
Name: Premium  Max 2000, Price: 120000.0, Rating: 4.9 out of 5 stars
Name: Basic  Model, Price: 8999.0, Rating: 4.2 out of 5 stars
----------------------------

Successfully saved 4 products to '_products.xlsx'
Enter product to search for (e.g., 'Laptop', 'Smartphone', 'Headphones') or 'exit' to quit: Samsung S24 Ultra
Simulating search for: 'Samsung S24 Ultra'

--- Scraped Product Data ---
Name: Luxury Samsung S24 Ultra Pro X, Price: 75999.0, Rating: 4.5 out of 5 stars
Name: Budget Samsung S24 Ultra Lite, Price: 12499.0, Rating: 3.8 out of 5 stars
Name: Premium Samsung S24 Ultra Max 2000, Price: 120000.0, Rating: 4.9 out of 5 stars
Name: Basic Samsung S24 Ultra Model, Price: 8999.0, Rating: 4.2 out of 5 stars
----