In [16]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import csv
import re


In [17]:
urls = [
        'https://www.flipkart.com/search?q=ac&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off'
        ]

In [18]:
driver = webdriver.Chrome()

In [19]:
# List to store scraped data before writing to CSV
data_list = []

for url in urls:
    driver.get(url)
    time.sleep(4)
    print(f"Scraping data from {url}")
    last_page = None  

    while True:
        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')

        # Extract current page number
        page_info = soup.find("div", class_="_1G0WLw")
        if page_info:
            page_text = page_info.find("span").text  
            current_page = page_text.split()[1]  
            print(f"📄 Scraping details from Page {current_page}...")

            if current_page == last_page:  
                print("✅ Scraping completed. No more pages available.")
                break
            last_page = current_page  

        # Extract product details
        title = soup.find_all('div', class_="DOjaWF gdgoEp")
        for i in title:
            products = i.find_all('div', class_="cPHDOP col-12-12")[:-2]  # Exclude pagination elements
            
            for product in products:
                j = product.find_all('div', class_="tUxRFH")
                for k in j:
                    link = k.find('a', class_="CGtC98")
                    product_url = "https://www.flipkart.com" + link['href'] if link else "N/A"

                    name = k.find('div', class_="KzDlHZ").text.strip()
                    rating_tag = k.find('div', class_="XQDdHH")
                    rating = rating_tag.get_text(strip=True) if rating_tag else "N/A"

                    ratings_reviews = k.find('span', class_="Wphh3N")
                    num_rating, num_reviews = "N/A", "N/A"
                    if ratings_reviews:
                        text = ratings_reviews.get_text(strip=True)
                        parts = text.split("&")  
                        if len(parts) == 2:  
                            num_rating = ''.join(filter(str.isdigit, parts[0]))
                            num_reviews = ''.join(filter(str.isdigit, parts[1]))

                    # Extract AC specifications
                    details = k.find('div', class_="_6NESgJ")
                    annual_power, room_size, warranty = "N/A", "N/A", "N/A"

                    if details:
                        specifications = details.find_all('li', class_="J+igdf")
                        for spec in specifications:
                            spec_text = spec.text.strip()

                            # Extract Annual Power Usage (e.g., "Annual Power Usage: 852.44 W")
                            power_match = re.search(r'Annual Power Usage:\s*([\d.]+\s*[WkWh]*)', spec_text, re.IGNORECASE)
                            if power_match:
                                annual_power = power_match.group(1)

                            # Extract Room Size (e.g., "Room Size: 111 - 150 sqft")
                            room_size_match = re.search(r'Room Size:\s*([\d\s\-]+ sqft)', spec_text, re.IGNORECASE)
                            if room_size_match:
                                room_size = room_size_match.group(1)

                            # Extract Warranty Details (e.g., "1 Year Warranty on Product, 5 Years on PCB and 10 Years on Compressor with Gas Charging")
                            warranty_match = re.search(r'(\d+\s*Year.*?)$', spec_text, re.IGNORECASE)
                            if warranty_match:
                                warranty = warranty_match.group(1)

                    print(f"Annual Power Usage: {annual_power}, Room Size: {room_size}, Warranty: {warranty}")

                    price = k.find('div', class_="Nx9bqj _4b5DiR")
                    price = price.text.strip().replace("₹", "").replace(",", "") if price else "N/A"

                    original_price = k.find('div', class_="yRaY8j ZYYwLA")
                    original_price = original_price.text.strip().replace("₹", "").replace(",", "") if original_price else "N/A"

                    discount = k.find('div', class_="UkUFwK")
                    discount = discount.text.strip().replace("off", "").replace("%", "") if discount else "N/A"

                    # Append data to list
                    data_list.append([
                        product_url, name, rating, num_rating, num_reviews,
                        annual_power, room_size, warranty,  # Correctly extracted values
                        price, original_price, discount
                    ])

                    print(f"✅ Scraped: {name}")

        # Locate and click the "Next" button
        try:
            next_button = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.XPATH, "//a[@class='_9QVEpD' and span[text()='Next']]"))
            )
            driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", next_button)
            time.sleep(2)
            next_button.click()
            print("➡️ Moving to the next page...\n")
            time.sleep(3)
        except Exception as e:
            print("✅ Scraping completed or no more pages available.", e)
            break  


Scraping data from https://www.flipkart.com/search?q=ac&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off
📄 Scraping details from Page 1...
Annual Power Usage: 852.44 kWh, Room Size: 111 - 150 sqft, Warranty: 1 Year Warranty on Product,  5 Years on PCB and 10 Years on Compressor with Gas Charging
✅ Scraped: LG 2025 Mode AI Convertible 6-in-1 1.5 Ton 3 Star Split AI Dual Inverter with Faster Cooling and Energ...
Annual Power Usage: 852.44 kWh, Room Size: 111 - 150 sqft, Warranty: 1 Year Warranty on Product, 5 Years Warranty on PCB and 10 Years Warranty on Compressor from LG
✅ Scraped: LG AI Convertible 6-in-1 Cooling 2024 Model 1.5 Ton 3 Star Split Dual Inverter HD Filter with Anti-Vir...
Annual Power Usage: 553.16 kWh, Room Size: 90 sqft, Warranty: 1 Year Warranty on Product and 10 Years Warranty on Compressor
✅ Scraped: MarQ by Flipkart 0.75 Ton 3 Star Split Inverter 4-in-1 Convertible with Turbo Cool Technology AC  - Wh...
Annual Power Usage: 1006.84 kWh, Room S

In [20]:
# Define CSV filename
csv_filename = "flipkart_air_conditioners.csv"

# Save data to CSV
with open(csv_filename, "w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    
    # Write header (Ensure it matches the actual extracted data)
    writer.writerow([
        "Product URL", "Name", "Rating", "Number of Ratings", "Number of Reviews", 
        "Power Usage (W)", "Room Size (sqft)", "Warranty",
        "Price (₹)", "Original Price (₹)", "Discount (%)"
    ])

    # Write all data rows
    writer.writerows(data_list)

print(f"📂 Data successfully saved in '{csv_filename}' ✅")


📂 Data successfully saved in 'flipkart_air_conditioners.csv' ✅
