In [7]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import csv
import re


In [8]:

urls = [
        'https://www.flipkart.com/search?q=washing+machine&otracker=AS_Query_HistoryAutoSuggest_4_0&otracker1=AS_Query_HistoryAutoSuggest_4_0&marketplace=FLIPKART&as-show=on&as=off&as-pos=4&as-type=HISTORY&page=1'
        ]
driver = webdriver.Chrome()


In [9]:
# List to store scraped data before writing to CSV
data_list = []

for url in urls:
    driver.get(url)
    time.sleep(4)
    print(f"Scraping data from {url}")
    last_page = None  

    while True:
        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')

        # Extract current page number
        page_info = soup.find("div", class_="_1G0WLw")
        if page_info:
            page_text = page_info.find("span").text  
            current_page = page_text.split()[1]  
            print(f"📄 Scraping details from Page {current_page}...")
            if current_page == last_page:  
                print("✅ Scraping completed. No more pages available.")
                break
            last_page = current_page  

        # Extract product details
        title = soup.find_all('div', class_="DOjaWF gdgoEp")
        for i in title:
            products = i.find_all('div', class_="cPHDOP col-12-12")[:-2]  # Exclude pagination elements
            
            for product in products:
                j = product.find_all('div', class_="tUxRFH")
                for k in j:
                    link = k.find('a', class_="CGtC98")
                    product_url = "https://www.flipkart.com" + link['href'] if link else "N/A"

                    name = k.find('div', class_="KzDlHZ").text.strip()
                    rating_tag = k.find('div', class_="XQDdHH")
                    rating = rating_tag.get_text(strip=True) if rating_tag else "N/A"

                    ratings_reviews = k.find('span', class_="Wphh3N")
                    num_rating, num_reviews = "N/A", "N/A"
                    if ratings_reviews:
                        text = ratings_reviews.get_text(strip=True)
                        parts = text.split("&")  
                        if len(parts) == 2:  
                            num_rating = ''.join(filter(str.isdigit, parts[0]))
                            num_reviews = ''.join(filter(str.isdigit, parts[1]))

                    # Extract washing machine specifications
                    details = k.find('div', class_="_6NESgJ").find_all('li', class_="J+igdf")
                    max_speed, star_rating, heater, warranty = "N/A", "N/A", "N/A", "N/A"

                    for detail in details:
                        detail_text = detail.text.strip().lower()
                        
                        if "rpm max speed" in detail_text:
                            max_speed = re.search(r'\d+', detail_text).group() + " RPM"
                        elif "star rating" in detail_text:
                            star_rating = re.search(r'\d+', detail_text).group() + " Star"
                        elif "in-built heater" in detail_text:
                            heater = "Yes"
                        elif "warranty" in detail_text:
                            warranty_match = re.search(r"(\d+) years?", detail_text)
                            if warranty_match:
                                warranty = warranty_match.group(1) + " Years"

                    price = k.find('div', class_="Nx9bqj _4b5DiR")
                    price = price.text.strip().replace("₹", "").replace(",", "") if price else "N/A"

                    original_price = k.find('div', class_="yRaY8j ZYYwLA")
                    original_price = original_price.text.strip().replace("₹", "").replace(",", "") if original_price else "N/A"

                    discount = k.find('div', class_="UkUFwK")
                    discount = discount.text.strip().replace("off", "").replace("%", "") if discount else "N/A"

                    # Append data to list
                    data_list.append([
                        product_url, name, rating, num_rating, num_reviews,
                        max_speed, star_rating, heater, warranty,
                        price, original_price, discount
                    ])

                    print(f"✅ Scraped: {name}")

        # Locate and click the "Next" button
        try:
            next_button = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.XPATH, "//a[@class='_9QVEpD' and span[text()='Next']]"))
            )
            driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", next_button)
            time.sleep(2)
            next_button.click()
            print("➡️ Moving to the next page...\n")
            time.sleep(3)
        except Exception as e:
            print("✅ Scraping completed or no more pages available.", e)
            break  


Scraping data from https://www.flipkart.com/search?q=washing+machine&otracker=AS_Query_HistoryAutoSuggest_4_0&otracker1=AS_Query_HistoryAutoSuggest_4_0&marketplace=FLIPKART&as-show=on&as=off&as-pos=4&as-type=HISTORY&page=1
📄 Scraping details from Page 1...
✅ Scraped: ONIDA 9 kg Washer only White, Maroon
✅ Scraped: ONIDA 9 kg Washer only Black
✅ Scraped: MarQ by Flipkart 6 kg 5 Star Rating Innowash Range Semi Automatic Top Load Washing Machine White, Maro...
✅ Scraped: Whirlpool 6.5 kg Fully Automatic Top Load Washing Machine with In-built Heater Grey
✅ Scraped: Voltas Beko 7.5 kg Semi Automatic Top Load Washing Machine Grey, White
✅ Scraped: Thomson 7 kg Washer only Red, White
✅ Scraped: realme TechLife 7 kg 5 Star rating Semi Automatic Top Load Washing Machine White, Black
✅ Scraped: Whirlpool 7 kg 5 Star with Ace Wash Station, 1400 RPM Speed and Rust Proof Semi Automatic Top Load Was...
✅ Scraped: realme TechLife 7.5 kg 5 Star Rating Semi Automatic Top Load Washing Machine Black, Gre

In [10]:
# ✅ Write the data to CSV at the end
csv_filename = "flipkart_washing_machines.csv"
with open(csv_filename, "w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    
    # Write header with relevant washing machine specifications
    writer.writerow([
        "Product URL", "Name", "Rating", "Number of Ratings", "Number of Reviews", 
        "Max Speed (RPM)", "Star Rating", "In-Built Heater", "Warranty", 
        "Price (₹)", "Original Price (₹)", "Discount (%)"
    ])

    # Write all data rows
    writer.writerows(data_list)

print(f"📂 Data successfully saved in '{csv_filename}'")


📂 Data successfully saved in 'flipkart_washing_machines.csv'
