In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the Flipkart mobile phones page
searchString ="oppo%20mobile"
url = 'https://www.flipkart.com/search?q=' + searchString

# Headers to simulate a request from a browser
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
    'Accept-Language': 'en-US,en;q=0.9'
}

# Send a GET request to the URL
response = requests.get(url, headers=headers)

# Check if the request was successful
if response.status_code != 200:
    raise Exception(f"Failed to load page {url}. Status code: {response.status_code}")

# Parse the HTML content of the page
soup = BeautifulSoup(response.content, 'html.parser')

# Lists to store the extracted data
model_names = []
prices = []

# Find all the containers with the relevant data
containers = soup.find_all('div', class_='cPHDOP col-12-12')

for container in containers:
    try:
        # Extract the model name
        model_name = container.find('div', class_='KzDlHZ')
        # Extract the price
        price = container.find('div', class_='Nx9bqj _4b5DiR')
        
        if model_name and price:
            model_names.append(model_name.text)
            prices.append(price.text)
    except AttributeError:
        continue

# Check if we have extracted any data
if not model_names or not prices:
    raise Exception("Failed to extract data. Check if the page structure has changed.")

# Create a DataFrame from the extracted data
data = pd.DataFrame({
    'Model Name': model_names,
    'Price': prices
})

# Save the DataFrame to a CSV file
data.to_csv('flipkart_mobiles1.csv', index=False)

print("Data has been successfully saved to flipkart_mobiles1.csv")


Data has been successfully saved to flipkart_mobiles1.csv


In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager
import time

# Configure Selenium WebDriver
options = Options()
options.add_argument('--headless')  # Run in headless mode
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# URL of the Reliance Digital search page
url = 'https://www.reliancedigital.in/search?q=oppo'
driver.get(url)

# Wait for content to load
time.sleep(10)  # Wait for 10 seconds or adjust as needed

# Print page source for debugging
page_source = driver.page_source
with open('page_source.html', 'w', encoding='utf-8') as file:
    file.write(page_source)

# Lists to store the extracted data
model_names = []
prices = []

# Loop over the first 24 containers
for i in range(1, 25):  # Adjust the range if needed
    try:
        # Find the container with the relevant data
        container_xpath = f'//*[@id="pl"]/div[2]/ul/li[{i}]'
        container = driver.find_element(By.XPATH, container_xpath)
        
        # Extract the model name
        model_name_xpath = f'//*[@id="pl"]/div[2]/ul/li[{i}]/div/a/div/div[2]/p'
        model_name_tag = container.find_element(By.XPATH, model_name_xpath)
        
        # Extract the price
        price_xpath = f'//*[@id="pl"]/div[2]/ul/li[{i}]/div/a/div/div[2]/div[1]/div/div/span/span[2]'
        price_tag = container.find_element(By.XPATH, price_xpath)

        if model_name_tag and price_tag: 
            model_names.append(model_name_tag.text.strip())
            prices.append(price_tag.text.strip())
    except Exception as e:
        print(f"Exception occurred while parsing container {i}: {e}")
        continue

# Close the WebDriver
driver.quit()

# Debugging: Print extracted data
print("Extracted model names:")
print(model_names)
print("Extracted prices:")
print(prices)

# Check if we have extracted any data
if not model_names or not prices:
    raise Exception("No data extracted. Please check the HTML structure and selectors.")

# Create a DataFrame from the extracted data
data = pd.DataFrame({
    'Model Name': model_names,
    'Price': prices
})

# Save the DataFrame to a CSV file
try:
    data.to_csv('reliance_selenium.csv', index=False)
    print("Data has been successfully saved to reliance_selenium.csv")
except Exception as e:
    print(f"Error saving to CSV file: {e}")


Extracted model names:
['Oppo 10000 mAh 18 Watts Fast Charging Power Bank 2 with Type-C and Micro USB Dual Connection (Black)', 'Oppo A15 32 GB, 3 GB RAM, Mystery Blue, Mobile Phone (JioExclusive Offer Available)', 'OPPO PBV02 10000 mAh VOOC Power Bank with 30 Watts Reverse Flash Charging (White)', 'Oppo A3 Pro 5G 256 GB, 8 GB RAM, Moonlight Purple, Mobile Phone', 'Oppo F27 Pro Plus 5G 128 MB, 8 GB RAM, Dusk Pink, Mobile Phone', 'Oppo A3 Pro 5G 128 GB, 8 GB RAM, Moonlight Purple, Mobile Phone', 'Oppo F27 Pro Plus 5G 128 MB, 8 GB RAM, Midnight Navy, Mobile Phone', 'Oppo F25 Pro 5G 128 GB, 8 GB RAM, Mobile Phone, Ocean Blue', 'Oppo F25 Pro 5G 128 GB, 8 GB RAM, Mobile Phone, Lava Red', 'Oppo F27 Pro Plus 5G 256 MB, 8 GB RAM, Dusk Pink, Mobile Phone', 'Oppo A59 5G 128 GB, 4GB RAM, Silk Gold, Mobile Phone', 'Oppo F27 Pro Plus 5G 256 MB, 8 GB RAM, Midnight Navy, Mobile Phone', 'Oppo A59 5G 128 GB, 6 GB RAM, Silk Gold, Mobile Phone', 'Oppo A59 5G 128 GB, 6 GB RAM, Starry Black, Mobile Phone',