In [1]:
# importing libraries and packages
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time
import pandas as pd

In [2]:
# Set Chrome options for the web driver
options = Options()
# options.add_argument("--headless")  
options.add_argument("--disable-gpu")
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

# URL of the Flipkart laptop search results page
url = 'https://www.flipkart.com/search?q=laptops&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off'
driver.get(url)
time.sleep(5)

# Initialize empty lists to store scraped data
titles = []
prices = []
categories = []
ratings = []
number_of_reviews = []
page_counter = 0

In [3]:
# Define the maximum number of pages to scrap
max_pages = 40  

while page_counter < max_pages:
    content = driver.page_source
    soup = BeautifulSoup(content, "html.parser")
# Find product containers
    products = soup.find_all("div", class_="cPHDOP col-12-12")

    for product in products:
        # Extract product name
        title = product.find("div", class_="KzDlHZ")
        if title:
            titles.append(title.text)
        else:
            titles.append("Unknown")

        # Extract price
        price = product.find("div", class_="Nx9bqj _4b5DiR")
        if price:
            prices.append(price.text)
        else:
            prices.append('0')

        #  category is set statically
        categories = 'Laptop'
    
        # Extract rating
        rating = product.find("div", class_="XQDdHH")
        if rating:
            ratings.append(rating.text)
        else:
            ratings.append("Unknown")

        # Extract number of reviews
        reviews = product.find("span",class_="Wphh3N")
        if reviews:
            number_of_reviews.append(reviews.text)
        else:
            number_of_reviews.append("0")  # No reviews found

    try:
        # Find the "Next" button using an XPath expression and wait until it is clickable
        next_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//a[contains(@class, '_9QVEpD') and span[contains(text(), 'Next')]]"))
        )
        
        print("Next button URL:", next_button.get_attribute("href"))
        
        next_button.click()
        time.sleep(5)  
        
        page_counter += 1   # Increment the page counter after successfully navigating to the next page
    except Exception as e:
        print(f"No more pages to scrape or an error occurred: {e}")
        break  # Exit the loop if no more pages are available or an error occurs


Next button URL: https://www.flipkart.com/search?q=laptops&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=2
Next button URL: https://www.flipkart.com/search?q=laptops&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=3
Next button URL: https://www.flipkart.com/search?q=laptops&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=4
Next button URL: https://www.flipkart.com/search?q=laptops&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=5
Next button URL: https://www.flipkart.com/search?q=laptops&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=6
Next button URL: https://www.flipkart.com/search?q=laptops&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=7
Next button URL: https://www.flipkart.com/search?q=laptops&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=8
Next button URL: https://www.flipk

In [4]:
len(titles)

1161

In [5]:
# Store the scraped data in a DataFrame
data = {
    'Title': titles,
    'Price': prices,
    'Category': categories,
    'Rating': ratings,
    'Number of Reviews': number_of_reviews,
}
df = pd.DataFrame(data)


In [6]:
df

Unnamed: 0,Title,Price,Category,Rating,Number of Reviews
0,Unknown,0,Laptop,Unknown,0
1,Unknown,0,Laptop,Unknown,0
2,CHUWI Intel Celeron Dual Core 11th Gen N4020 -...,"₹16,990",Laptop,3.7,960 Ratings & 107 Reviews
3,CHUWI Intel Core i5 10th Gen 1035G1 - (16 GB/5...,"₹25,990",Laptop,4.1,548 Ratings & 88 Reviews
4,ASUS Chromebook Intel Celeron Dual Core N4500 ...,"₹12,990",Laptop,3.7,"2,007 Ratings & 180 Reviews"
...,...,...,...,...,...
1156,Lenovo Intel Core i3 11th Gen 1115G4 - (4 GB/2...,"₹36,990",Laptop,4.3,226 Ratings & 28 Reviews
1157,HP 14s Intel Core i5 10th Gen 1035G1 - (8 GB/1...,"₹58,990",Laptop,4.4,911 Ratings & 109 Reviews
1158,Unknown,0,Laptop,Unknown,0
1159,Unknown,0,Laptop,3.6,0


In [7]:
df.shape

(1161, 5)

In [8]:
# Save to CSV file
df.to_csv('flipkart_laptops.csv', index=False)

In [9]:
# Close the browser
driver.quit()