### Web Scrapping from Flipkart

In [1]:
import os
os.getcwd()
import warnings
warnings.filterwarnings('ignore')

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import csv
import time
from datetime import datetime

# Set up the Selenium WebDriver (make sure you have the appropriate driver installed)
service = Service(r'C:\Users\angel\OneDrive\Desktop\Git Hub\chromedriver-win64\chromedriver.exe')  # Using raw string
driver = webdriver.Chrome(service=service)

# Base URL of the page to scrape
base_url = 'https://www.flipkart.com/apple-iphone-15-green-128-gb/product-reviews/itm235cd318bde73?pid=MOBGTAGPYYWZRUJX&lid=LSTMOBGTAGPYYWZRUJXUGY7PM&marketplace=FLIPKART&page={}'

# Function to get reviews from a page
def get_reviews_from_page(page_num):
    url = base_url.format(page_num)
    driver.get(url)
    time.sleep(3)  # Wait for the page to load
    reviews = []

    # Find all review containers
    divs = driver.find_elements(By.CLASS_NAME, 'ZmyHeo')
    p_tags = driver.find_elements(By.CLASS_NAME, 'z9E0IG')

    # Extract and collect the review texts
    for div in divs:
        review_text = div.text.strip()
        reviews.append(review_text)

    for p in p_tags:
        review_text = p.text.strip()
        reviews.append(review_text)

    return reviews

# Main scraping function with error handling and direct saving
def scrape_reviews(max_pages=245, filename='reviews.csv'):
    with open(filename, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Review'])  # Write the header once

        for page_num in range(1, max_pages + 1):
            try:
                print(f"[{datetime.now()}] Scraping page {page_num}...")
                reviews = get_reviews_from_page(page_num)
                if reviews:
                    writer.writerows([[review] for review in reviews])

                time.sleep(2)  # To avoid getting blocked, sleep between requests

            except Exception as e:
                print(f"Error on page {page_num}: {e}")
                time.sleep(5)  # Wait a bit longer after an error to avoid getting blocked

    print(f"Scraping completed. Reviews saved to {filename}.")

# Execute the scraping and saving
if __name__ == "__main__":
    scrape_reviews(max_pages=245, filename='reviewstest.csv')

# Close the WebDriver
driver.quit()


[2024-08-27 12:45:01.139365] Scraping page 1...
[2024-08-27 12:45:09.190352] Scraping page 2...
[2024-08-27 12:45:17.419075] Scraping page 3...
[2024-08-27 12:45:25.296713] Scraping page 4...
[2024-08-27 12:45:33.164170] Scraping page 5...
[2024-08-27 12:45:42.694104] Scraping page 6...
[2024-08-27 12:45:51.762591] Scraping page 7...
[2024-08-27 12:45:59.666685] Scraping page 8...
[2024-08-27 12:46:08.049850] Scraping page 9...
[2024-08-27 12:46:18.111319] Scraping page 10...
[2024-08-27 12:46:26.048539] Scraping page 11...
[2024-08-27 12:46:34.898825] Scraping page 12...
[2024-08-27 12:46:42.377144] Scraping page 13...
[2024-08-27 12:46:49.840208] Scraping page 14...
[2024-08-27 12:46:57.402284] Scraping page 15...
[2024-08-27 12:47:04.407574] Scraping page 16...
[2024-08-27 12:47:11.956521] Scraping page 17...
[2024-08-27 12:47:19.743196] Scraping page 18...
[2024-08-27 12:47:28.129870] Scraping page 19...
[2024-08-27 12:47:36.007598] Scraping page 20...
[2024-08-27 12:47:43.844539] 