In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import json
import time

# Function to extract phone details from a single page
def extract_phone_details(driver):
    phones = []

    # Find all product containers
    product_elements = driver.find_elements(By.XPATH, '//div[contains(@class, "tUxRFH")]')

    for product in product_elements:
        try:
            # Extract details
            img = product.find_element(By.TAG_NAME, 'img')
            image_url = img.get_attribute('src')

            # Use a different approach to extract the name
            name = product.find_element(By.XPATH, './/div[contains(@class, "KzDlHZ")]').text  # Adjust based on the actual class

            # Extract ratings and reviews
            ratings_reviews_element = product.find_element(By.XPATH, './/span[contains(@class, "Wphh3N")]')
            total_ratings = ratings_reviews_element.find_element(By.XPATH, './/span[1]').text.split()[0]  # Get the first span for ratings
            total_reviews = ratings_reviews_element.find_element(By.XPATH, './/span[3]').text.split()[0]  # Get the third span for reviews
            average_ratings = product.find_element(By.CLASS_NAME, 'XQDdHH').text

            # Extract prices
            discounted_price = product.find_element(By.XPATH, './/div[contains(@class, "Nx9bqj") and contains(@class, "_4b5DiR")]').text
            # actual_price = product.find_element(By.XPATH, './/div[contains(@class, "yRaY8j") and contains(@class, "ZYYwLA")]').text


            # Create a dictionary for the phone
            phone = {
                'image_url': image_url,
                'name': name,
                'average_ratings': average_ratings,
                'total_ratings': total_ratings,
                'total_reviews': total_reviews,
                'discounted_price': discounted_price,
                # 'actual_price': actual_price
            }

            phones.append(phone)

        except Exception as e:
            print(f"Error extracting details for a product: {e}")
            continue

    return phones

# Set up the Chrome WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Initialize a list to store all phones
all_phones = []

# Base URL for Flipkart search results
base_url = 'https://www.flipkart.com/search?q=smartphones'

# Loop through the pagination
for page in range(1, 42):  # Adjust the range as needed for more pages
    # Navigate to the page
    driver.get(f"{base_url}&page={page}")

    # Allow some time for the page to load
    time.sleep(3)  # Wait for the page to load completely

    # Extract phone details from the current page
    phones_on_page = extract_phone_details(driver)
    all_phones.extend(phones_on_page)  # Add the current page's phones to the list

# Save the data to a JSON file
with open('flipkart_phones_without_actual_price.json', 'w') as json_file:
    json.dump(all_phones, json_file, indent=4)

# Close the browser
driver.quit()

print(f"Extracted data for {len(all_phones)} phones and saved to 'flipkart_phones.json'.")


In [None]:
import pandas as pd
phones = pd.read_json('flipkart_phones_without_actual_price.json')
phones.head()

In [None]:

phones.info()

In [None]:
phones['average_ratings'].sort_values(ascending=False)

In [None]:
phones[phones['average_ratings'] == 4.7][['name', 'discounted_price', ]]