In [3]:
import time
import random
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import logging

# Basic logging setup
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\

# Set up Selenium WebDriver options
options = Options()
options.headless = True
driver = webdriver.Chrome(options=options)

# Define the link and navigate
link = 'https://www.myntra.com/reviews/10339033'
driver.get(link)
logging.info("Navigated to the page")

# Wait for the initial reviews to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'user-review-userReviewWrapper')))
logging.info("Initial reviews loaded")

# Function to perform slow scrolling
def slow_scroll(scrolls=1900, increment=200):
    for i in range(scrolls):  # Ensure 'i' is defined here as the loop variable
        # Scroll down by a small increment and wait a random amount of time
        driver.execute_script(f"window.scrollBy(0, {increment});")
        time.sleep(random.uniform(0.5, 1.5))
        if i % 100 == 0:  # Log progress every 100 scrolls
            logging.info(f"Scrolled {i + 1} times")

        # Check if new reviews have loaded, by looking for the loading element or any other indicator
        # This step may need to be adjusted based on the website's behavior.

# Perform the slow scrolling
slow_scroll()

# Now extract the reviews
reviews_data = []
review_elements = driver.find_elements(By.CLASS_NAME, 'user-review-userReviewWrapper')

# Correct enumeration of review_elements to define 'index' and 'data'
for index, data in enumerate(review_elements):
    review_dict = {}

    # EXTRACTING rating from reviews
    rating_element = data.find_element(By.CSS_SELECTOR, '[class^="user-review-starRating"]')
    review_dict['Rating'] = rating_element.get_attribute('class').split(' ')[-1] if rating_element else '0'

    # EXTRACTING review text from reviews
    review_text_element = data.find_element(By.CLASS_NAME, 'user-review-reviewTextWrapper')
    review_dict['Review Text'] = review_text_element.text if review_text_element else 'No review text'

    # EXTRACTING likes and dislikes on review
    # Replace 'user-review-thumb' with the actual class name that contains the likes/dislikes counts
    votes_elements = data.find_elements(By.CSS_SELECTOR, '.user-review-votes .user-review-thumb')
    review_dict['Likes'] = votes_elements[0].text if votes_elements else '0'
    review_dict['Dislikes'] = votes_elements[1].text if len(votes_elements) > 1 else '0'
    
    # EXTRACTING name of reviewer and date
    reviewer_info = data.find_elements(By.CSS_SELECTOR, '.user-review-left span')
    review_dict['Name'] = reviewer_info[0].text if reviewer_info else 'Anonymous'
    review_dict['Date'] = reviewer_info[1].text if len(reviewer_info) > 1 else 'No date'
    
    # EXTRACTING number of images on review
    review_images_elements = data.find_elements(By.CLASS_NAME, 'image-thumb-wrapper-container')
    review_dict['Images'] = str(len(review_images_elements))
    
    # Log progress for every 100th review processed
    if index % 100 == 0:
        logging.info(f"Processing review {index + 1}/{len(review_elements)}")
    
    reviews_data.append(review_dict)

# Quit the driver
driver.quit()

# Create DataFrame from the list of dictionaries
dataframe = pd.DataFrame(reviews_data)
dataframe = dataframe[['Name', 'Date', 'Rating', 'Images', 'Likes', 'Dislikes', 'Review Text']]

# Display the DataFrame
print(dataframe)

2024-03-09 17:18:25,771 - INFO - Navigated to the page
2024-03-09 17:18:25,786 - INFO - Initial reviews loaded
2024-03-09 17:18:27,059 - INFO - Scrolled 1 times
2024-03-09 17:20:11,433 - INFO - Scrolled 101 times
2024-03-09 17:21:58,900 - INFO - Scrolled 201 times
2024-03-09 17:23:44,962 - INFO - Scrolled 301 times
2024-03-09 17:25:37,095 - INFO - Scrolled 401 times
2024-03-09 17:27:26,276 - INFO - Scrolled 501 times
2024-03-09 17:29:22,474 - INFO - Scrolled 601 times
2024-03-09 17:31:21,872 - INFO - Scrolled 701 times
2024-03-09 17:33:24,220 - INFO - Scrolled 801 times
2024-03-09 17:35:25,220 - INFO - Scrolled 901 times
2024-03-09 17:37:31,573 - INFO - Scrolled 1001 times
2024-03-09 17:39:33,491 - INFO - Scrolled 1101 times
2024-03-09 17:41:36,877 - INFO - Scrolled 1201 times
2024-03-09 17:43:42,843 - INFO - Scrolled 1301 times
2024-03-09 17:45:52,630 - INFO - Scrolled 1401 times
2024-03-09 17:47:44,957 - INFO - Scrolled 1501 times
2024-03-09 17:49:35,623 - INFO - Scrolled 1601 times


                    Name          Date                 Rating Images Likes  \
0                   Ajay    9 Mar 2023  user-review-fourStars      1    34   
1              Tilottoma   30 Jan 2023  user-review-fiveStars      0    38   
2          Mohammad Zaid   25 Dec 2021  user-review-fourStars      1   215   
3     Suraj Kumar Thakur  16 Sept 2022  user-review-fiveStars      1    33   
4          Akash Kashyap   14 Aug 2022  user-review-fourStars      1   171   
...                  ...           ...                    ...    ...   ...   
2308       Vishal Mishra   7 July 2020    user-review-oneStar      0     0   
2309       Akshay Bawkar  21 July 2021    user-review-oneStar      0     0   
2310       Prajas Khanna   26 Feb 2022    user-review-oneStar      0     0   
2311           Ram Kumar   21 Feb 2022    user-review-oneStar      0     0   
2312                Akhi   19 Jan 2021    user-review-oneStar      0     0   

     Dislikes                                        Review Tex

In [4]:
dataframe

Unnamed: 0,Name,Date,Rating,Images,Likes,Dislikes,Review Text
0,Ajay,9 Mar 2023,user-review-fourStars,1,34,15,"Quality of shoes is good at this price , its c..."
1,Tilottoma,30 Jan 2023,user-review-fiveStars,0,38,14,If you are looking for shoe without opening th...
2,Mohammad Zaid,25 Dec 2021,user-review-fourStars,1,215,58,I got this shoe very recently during the last ...
3,Suraj Kumar Thakur,16 Sept 2022,user-review-fiveStars,1,33,14,"Just bought this one, and the product is soo p..."
4,Akash Kashyap,14 Aug 2022,user-review-fourStars,1,171,55,Buy 1 size small as your actual size. Overall ...
...,...,...,...,...,...,...,...
2308,Vishal Mishra,7 July 2020,user-review-oneStar,0,0,3,Not that bad
2309,Akshay Bawkar,21 July 2021,user-review-oneStar,0,0,0,Miss match with size
2310,Prajas Khanna,26 Feb 2022,user-review-oneStar,0,0,0,Shoes damage
2311,Ram Kumar,21 Feb 2022,user-review-oneStar,0,0,0,Bad quality


In [5]:
dataframe.to_excel('GroupAssignment_Final.xlsx')

In [6]:
dataframe.info

<bound method DataFrame.info of                     Name          Date                 Rating Images Likes  \
0                   Ajay    9 Mar 2023  user-review-fourStars      1    34   
1              Tilottoma   30 Jan 2023  user-review-fiveStars      0    38   
2          Mohammad Zaid   25 Dec 2021  user-review-fourStars      1   215   
3     Suraj Kumar Thakur  16 Sept 2022  user-review-fiveStars      1    33   
4          Akash Kashyap   14 Aug 2022  user-review-fourStars      1   171   
...                  ...           ...                    ...    ...   ...   
2308       Vishal Mishra   7 July 2020    user-review-oneStar      0     0   
2309       Akshay Bawkar  21 July 2021    user-review-oneStar      0     0   
2310       Prajas Khanna   26 Feb 2022    user-review-oneStar      0     0   
2311           Ram Kumar   21 Feb 2022    user-review-oneStar      0     0   
2312                Akhi   19 Jan 2021    user-review-oneStar      0     0   

     Dislikes                  