In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException

# Web driver
driver = webdriver.Chrome()

def scrape_reviews(url, max_page=5):
    reviews = []

    for page in range(1, max_page + 1):
        print("Page:", page)
        page_url = f'{url}{page}?ie=UTF8&reviewerType=all_reviews&pageNumber={page}'
        driver.get(page_url)

        review_elements = driver.find_elements(By.XPATH, "//div[@data-hook='review']")
        if review_elements:
            for review in review_elements:
                review_data_dict = {}
                review_data_dict['reviewer_name'] = review.find_element(By.XPATH, ".//span[@class='a-profile-name']").text
                review_data_dict['review_date'] = review.find_element(By.XPATH, ".//span[@data-hook='review-date']").text
              
                review_data_dict['rating'] = review.find_element(By.XPATH, ".//i[contains(@class, 'review-rating')]/span").get_attribute('innerText')
                
                # review_data_dict['rating'] = "Rating not found"
                review_data_dict['review_text'] = review.find_element(By.XPATH, ".//span[@data-hook='review-body']").text
                reviews.append(review_data_dict)
        else:
            print(f'NO Reviews Found on Page {page}')
            break

        # Clicking on the next page button
        try:
            next_button = driver.find_element(By.XPATH, "//li[@class='a-last']//a")
            next_button.click()
            WebDriverWait(driver, 10).until(EC.url_changes(page_url))
        except NoSuchElementException:
            print("Next Page not found")
            break

    return reviews

web_page_url = "https://www.amazon.in/Apple-iPhone-13-128GB-Blue/product-reviews/B09G9BL5CP/ref=cm_cr_arp_d_paging_btm_next_"


import re

full_link = "https://www.amazon.in/Apple-iPhone-13-128GB-Blue/product-reviews/B09G9BL5CP/ref=cm_cr_arp_d_paging_btm_next_1?ie=UTF8&reviewerType=all_reviews&pageNumber=1"

# Define a regular expression pattern to extract the desired part of the link
pattern = r'(https://www.amazon.in/[^/]+/product-reviews/[^/]+/ref=cm_cr_arp_d_paging_btm_next_)'

# Use re.search to find the pattern in the link
match = re.search(pattern, full_link)

if match:
    extracted_part = match.group(1)
    print(extracted_part)
else:
    print("Pattern not found in the link.")

amazon_reviews = scrape_reviews(web_page_url)

print(amazon_reviews)

driver.close()


https://www.amazon.in/Apple-iPhone-13-128GB-Blue/product-reviews/B09G9BL5CP/ref=cm_cr_arp_d_paging_btm_next_
Page: 1
Page: 2
Page: 3
Page: 4
Page: 5
[{'reviewer_name': 'Jnan Shetty', 'review_date': 'Reviewed in India on 30 December 2023', 'rating': '5.0 out of 5 stars', 'review_text': 'Play Video\nThis is my first iPhone and it always feels good to hold this masterpiece. This looks premium not just because of its branding, it has everything which makes it feel premium. However, despite all the looks and finish, the transition from Android to Apple is not very smooth and you may constantly lack certain features that can make life easy. I was using vivo y19 Lite before this and still have that phone with me. Putting out the feature that is being missed by me-\n1. Forwarding Anything in Whatsapp: Forwarding anything in Whatsapp from IPhone takes bit more step as compared to what is being offered in Android. This makes the entire process slow for an Android User\n2. App Switcher: Apple cou

In [3]:
# amazon_reviews

[{'reviewer_name': 'Pankaj Kumar',
  'review_date': 'Reviewed in India on 24 February 2024',
  'rating': '5.0 out of 5 stars',
  'review_text': 'The iPhone 13 128GB has surpassed my expectations in every aspect, setting a new standard for smartphone excellence. From its sleek design to its powerful performance and innovative features, this device has truly redefined what it means to own a premium smartphone.\n\nDesign and Build Quality:\nThe iPhone 13’s design is a masterpiece of craftsmanship, featuring a sleek aluminum frame and a stunning ceramic shield front cover. The device feels incredibly premium in hand, with its compact form factor and exquisite attention to detail. The IP68 water and dust resistance provide added durability and peace of mind, making it perfect for everyday use.\n\nDisplay:\nThe Super Retina XDR display of the iPhone 13 is nothing short of breathtaking. The 6.1-inch OLED screen delivers vibrant colors, deep blacks, and excellent brightness levels, ensuring an

In [3]:
import pandas as pd

df = pd.DataFrame(amazon_reviews)

df

Unnamed: 0,reviewer_name,review_date,rating,review_text
0,Jnan Shetty,Reviewed in India on 30 December 2023,5.0 out of 5 stars,Play Video\nThis is my first iPhone and it alw...
1,Amazing performance. A15 Bio chip prosessor is...,Reviewed in India on 16 April 2024,5.0 out of 5 stars,The camera quality is just 🔥.\nThe battery is ...
2,Keshav Maheshwari,Reviewed in India on 4 February 2023,4.0 out of 5 stars,This is my first iPhone and it always feels go...
3,Pawan Sasane,Reviewed in India on 8 May 2024,5.0 out of 5 stars,Great Purchase! iPhone 13 is awesome phone .. ...
4,Happy,Reviewed in India on 22 March 2024,5.0 out of 5 stars,The big difference between ios and android is ...
5,GS Patel,Reviewed in India on 22 April 2024,5.0 out of 5 stars,Camera is excellent.
6,Raj Halder,Reviewed in India on 8 May 2024,5.0 out of 5 stars,
7,Maruf,Reviewed in India on 9 April 2024,5.0 out of 5 stars,Perfect
8,Shreyank,Reviewed in India on 28 April 2024,4.0 out of 5 stars,Not value for money
9,Soumyojit S.,Reviewed in India on 9 May 2024,5.0 out of 5 stars,You don't need to explain how good apple produ...


In [6]:
# df.to_csv('amazon_reviews.csv', index=False)

In [5]:
# for review in df['review_text']:
#     print(review)

The iPhone 13 128GB has surpassed my expectations in every aspect, setting a new standard for smartphone excellence. From its sleek design to its powerful performance and innovative features, this device has truly redefined what it means to own a premium smartphone.

Design and Build Quality:
The iPhone 13’s design is a masterpiece of craftsmanship, featuring a sleek aluminum frame and a stunning ceramic shield front cover. The device feels incredibly premium in hand, with its compact form factor and exquisite attention to detail. The IP68 water and dust resistance provide added durability and peace of mind, making it perfect for everyday use.

Display:
The Super Retina XDR display of the iPhone 13 is nothing short of breathtaking. The 6.1-inch OLED screen delivers vibrant colors, deep blacks, and excellent brightness levels, ensuring an immersive viewing experience for everything from streaming videos to browsing the web. The True Tone technology adapts the display to ambient lighting

In [22]:
%%writefile sentiment_analysis.py
# Importing necessary modules
import streamlit as st 
import google.generativeai as genai
import google.ai.generativelanguage as glm
import pandas as pd
# scraping start 
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.chrome.options import Options
import re

# Web driver
# chrome_options = Options()
# chrome_options.add_argument("--headless")


def scrape_reviews(url, max_page=5):
    driver = webdriver.Chrome()
    reviews = []

    for page in range(1, max_page + 1):
        print("Page:", page)
        page_url = f'{url}{page}?ie=UTF8&reviewerType=all_reviews&pageNumber={page}'
        driver.get(page_url)

        review_elements = driver.find_elements(By.XPATH, "//div[@data-hook='review']")
        if review_elements:
            for review in review_elements:
                review_data_dict = {}
                review_data_dict['reviewer_name'] = review.find_element(By.XPATH, ".//span[@class='a-profile-name']").text
                review_data_dict['review_date'] = review.find_element(By.XPATH, ".//span[@data-hook='review-date']").text
              
                review_data_dict['rating'] = review.find_element(By.XPATH, ".//i[contains(@class, 'review-rating')]/span").get_attribute('innerText')
                
                # review_data_dict['rating'] = "Rating not found"
                review_data_dict['review_text'] = review.find_element(By.XPATH, ".//span[@data-hook='review-body']").text
                reviews.append(review_data_dict)
        else:
            print(f'NO Reviews Found on Page {page}')
            break

        # Clicking on the next page button
        try:
            next_button = driver.find_element(By.XPATH, "//li[@class='a-last']//a")
            next_button.click()
            WebDriverWait(driver, 10).until(EC.url_changes(page_url))
        except NoSuchElementException:
            print("Next Page not found")
            break
        
    driver.close()

    return reviews

# web_page_url = "https://www.amazon.in/Apple-iPhone-13-128GB-Blue/product-reviews/B09G9BL5CP/ref=cm_cr_arp_d_paging_btm_next_"



# scraping ends here


genai.configure(api_key="AIzaSyC_L3-d181ibSultwSEuGm6P4XwE8HIsEQ")

if __name__=='__main__':

    st.header("Sentiment Analysis of Reviews")
    # data = st.sidebar.file_uploader("Upload Data File Here", type=['csv'])

    full_link = st.sidebar.text_input("Enter the link Here")
    btn =  st.sidebar.button("start")


    if btn:

        # full_link = "https://www.amazon.in/Apple-iPhone-13-128GB-Blue/product-reviews/B09G9BL5CP/ref=cm_cr_arp_d_paging_btm_next_1?ie=UTF8&reviewerType=all_reviews&pageNumber=1"

        # Define a regular expression pattern to extract the desired part of the link
        pattern = r'(https://www.amazon.in/[^/]+/product-reviews/[^/]+/ref=cm_cr_arp_d_paging_btm_next_)'

        # Use re.search to find the pattern in the link
        match = re.search(pattern, full_link)

        if match:
            extracted_part = match.group(1)
            print(extracted_part)
            amazon_reviews = scrape_reviews(extracted_part)
        else:
            print("Pattern not found in the link.")

        # amazon_reviews = scrape_reviews(web_page_url)

        # print(amazon_reviews)



        data_file = pd.DataFrame(amazon_reviews)
        data_file['date'] = data_file['review_date'].str.extract(r'on (\d+ \w+ \d{4})')
        data_file['date'] = pd.to_datetime(data_file['date'], format='%d %B %Y')
        if data_file is not None:
            # data_file  = pd.read_csv(data)
            # data_file = pd.DataFrame(amazon_reviews)
            Sentiment = []
            for reviews in data_file['review_text']:
             
                # model selection
                model = genai.GenerativeModel('gemini-pro')

                prompt = """ Give the Sentiment analysis of given review only in three words either **POSITIVE**👍🏻 or **NEGATIVE** 👎🏻 
                consider one more condition if the review is to larger consider it **spam** """

                response = model.generate_content([prompt, reviews])
                #  st.write(reviews)
                #  st.write(response.text)
                Sentiment.append(response.text)
                #  st.write("===============================================================================")

            data_file['Sentiment'] = Sentiment
            data_file.drop(columns=['review_date'], inplace=True)
            data_file = data_file.sort_values(by='date', ascending=False).reset_index(drop=True)
            st.write(data_file)



    

Overwriting sentiment_analysis.py


In [11]:
import pandas as pd
# scraping start 
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.chrome.options import Options
import re


def scrape_reviews(url, max_page=5):
    driver = webdriver.Chrome()
    reviews = []

    for page in range(1, max_page + 1):
        # print("Page:", page)
        page_url = f'{url+str(page)}'
        # uuu = 'https://www.flipkart.com/apple-iphone-13-midnight-128-gb/product-reviews/itmca361aab1c5b0?pid=MOBG6VF5Q82T3XRS&lid=LSTMOBG6VF5Q82T3XRSOXJLM9&aid=overall&certifiedBuyer=false&sortOrder=MOST_RECENT&page=1'
        driver.get(page_url)
        global review_elements
        review_elements = driver.find_elements(By.XPATH, '//div[@class="DOjaWF gdgoEp col-9-12"]//div[@class="cPHDOP col-12-12"]') # //*[@id="container"]/div/div[3]/div/div[1]/div[2]
        
        # print(review_elements)
        if review_elements:
            for i,review in enumerate(review_elements):
                # print(review)
                if i>2 and i<13:
                    reviews.append(review.text)
                    #for j in review.text:
                        #print(j, end='')
                    

    #             review_data_dict = {}
    #             review_data_dict['reviewer_name'] = review.find_element(By.XPATH, ".//p[@class='_2NsDsF AwS1CA']").text
    #             review_data_dict['review_date'] = review.find_element(By.XPATH, ".//p[@class='_2NsDsF']").text
              
    #             review_data_dict['rating'] = review.find_element(By.XPATH, ".//div[@class='XQDdHH Ga3i8K']").text
                
    # # #             # review_data_dict['rating'] = "Rating not found"
    #             review_data_dict['review_text'] = review.find_element(By.XPATH, ".//div[@class='ZmyHeo']").text
    #             reviews.append(review_data_dict)
    #             print(review_data_dict)
        # else:
        #     print(f'NO Reviews Found on Page {page}')
        #     break

    #     # Clicking on the next page button
    #     try:
    #         next_button = driver.find_element(By.XPATH, "//li[@class='a-last']//a")
    #         next_button.click()
    #         WebDriverWait(driver, 10).until(EC.url_changes(page_url))
    #     except NoSuchElementException:
    #         print("Next Page not found")
    #         break
        
    driver.close()

    return reviews

uuu = 'https://www.flipkart.com/apple-iphone-13-midnight-128-gb/product-reviews/itmca361aab1c5b0?pid=MOBG6VF5Q82T3XRS&lid=LSTMOBG6VF5Q82T3XRSOXJLM9&aid=overall&certifiedBuyer=false&sortOrder=MOST_RECENT&page='

abc = scrape_reviews(uuu)
print(abc, end=(''))
# my_str = ''.join(abc)

# print(my_str)

    

['5\nPerfect product!\nall about good thank you\nrohit sharma\nCertified Buyer, Yamunanagar\n4 days ago\n10', '5\nGreat product\nAwesome just Wow\nMadhu Khan\nCertified Buyer, Murshidabad District\n5 days ago\n00', '5\nFabulous!\nReally appreciate of the camera\nKaisor Khan\nCertified Buyer, Bengaluru\n6 days ago\n00', '5\nMind-blowing purchase\nGreat camera. High end performance. Amazing design.\nLoving it.\nSumana Pati\nCertified Buyer, Puruliya District\n6 days ago\n00', '4\nWorth the money\nVery nice product\nManju saji\nCertified Buyer, Thiruvananthapuram District\n7 days ago\n00', '5\nClassy product\nAwesome 😎😎 Thanks Flipkart\nFlipkart Customer\nCertified Buyer, Faridabad\n8 days ago\n00', '5\nWonderful\nexcellent product..Thankss Flipkart\nVishal Lal\nCertified Buyer, Kanpur\n8 days ago\n00', '4\nGood quality product\nThe colour is good. Camera and display are also good\nSimpu Pattanayak\nCertified Buyer, Sipcot Industrial Park Sriprumbudur\n8 days ago\n00', '5\nSimply awesome\

In [14]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By

def scrape_reviews(url, max_page=5):
    driver = webdriver.Chrome()
    reviews = []

    for page in range(1, max_page + 1):
        page_url = f'{url+str(page)}'
        driver.get(page_url)
        review_elements = driver.find_elements(By.XPATH, '//div[@class="DOjaWF gdgoEp col-9-12"]//div[@class="cPHDOP col-12-12"]')

        if review_elements:
            for i, review in enumerate(review_elements):
                if i > 2 and i < 13:
                    reviews.append(review.text)

    driver.close()

    return reviews

url = 'https://www.flipkart.com/apple-iphone-13-midnight-128-gb/product-reviews/itmca361aab1c5b0?pid=MOBG6VF5Q82T3XRS&lid=LSTMOBG6VF5Q82T3XRSOXJLM9&aid=overall&certifiedBuyer=false&sortOrder=MOST_RECENT&page='
reviews = scrape_reviews(url)
print(reviews,end=" ")


['5\nPerfect product!\nall about good thank you\nrohit sharma\nCertified Buyer, Yamunanagar\n4 days ago\n10', '5\nGreat product\nAwesome just Wow\nMadhu Khan\nCertified Buyer, Murshidabad District\n5 days ago\n00', '5\nFabulous!\nReally appreciate of the camera\nKaisor Khan\nCertified Buyer, Bengaluru\n6 days ago\n00', '5\nMind-blowing purchase\nGreat camera. High end performance. Amazing design.\nLoving it.\nSumana Pati\nCertified Buyer, Puruliya District\n6 days ago\n00', '4\nWorth the money\nVery nice product\nManju saji\nCertified Buyer, Thiruvananthapuram District\n7 days ago\n00', '5\nClassy product\nAwesome 😎😎 Thanks Flipkart\nFlipkart Customer\nCertified Buyer, Faridabad\n8 days ago\n00', '5\nWonderful\nexcellent product..Thankss Flipkart\nVishal Lal\nCertified Buyer, Kanpur\n8 days ago\n00', '4\nGood quality product\nThe colour is good. Camera and display are also good\nSimpu Pattanayak\nCertified Buyer, Sipcot Industrial Park Sriprumbudur\n8 days ago\n00', '5\nSimply awesome\

In [2]:
import pandas as pd

# Given list


# Splitting each element of the list
data = [elem.split('\n') for elem in abc]

# Creating DataFrame
df = pd.DataFrame(data, columns=['rating', 'tag line', 'review', 'reviewer name', 'address', 'time', 'unknown'])

# Dropping the last column
df.drop(columns=['unknown'], inplace=True)

# Displaying DataFrame
print(df)


ValueError: 7 columns passed, passed data had 13 columns

In [4]:
import pandas as pd

# list1 = ['5\nPerfect product!\nall about good thank you\nrohit sharma\nCertified Buyer, Yamunanagar\n4 days ago\n10', 
#          '5\nGreat product\nAwesome just Wow\nMadhu Khan\nCertified Buyer, Murshidabad District\n5 days ago\n00', 
#          '5\nFabulous!\nReally appreciate of the camera\nKaisor Khan\nCertified Buyer, Bengaluru\n5 days ago\n00', 
#          'Page 1 of 1,366\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\nNEXT', 
#          '5\nPerfect product!\nall about good thank you\nrohit sharma\nCertified Buyer, Yamunanagar\n4 days ago\n10']

# Initialize empty lists to store data
ratings = []
tag_lines = []
reviews = []
reviewer_names = []
addresses = []
times = []

# Loop through list elements
for item in abc:
    if "Page" in item:  # Skip irrelevant line
        continue
    else:
        # Split the item by newline character and extract relevant information
        parts = item.split("\n")
        ratings.append(parts[0])  # Rating
        tag_lines.append(parts[1])  # Tag line
        reviews.append(parts[2])  # Review
        reviewer_names.append(parts[3])  # Reviewer name
        address_time = parts[4].split(", ")  # Address and time
        addresses.append(address_time[1])  # Address
        times.append(parts[5])  # Time

# Create DataFrame
df = pd.DataFrame({
    'rating': ratings,
    'tag line': tag_lines,
    'review': reviews,
    'reviewer name': reviewer_names,
    'address': addresses,
    'time': times
})

print(df)


IndexError: list index out of range

In [6]:
# Loop through list elements
for item in abc:
    if "Page" in item:  # Skip irrelevant line
        continue
    else:
        # Split the item by newline character and extract relevant information
        parts = item.split("\n")
        ratings.append(parts[0])  # Rating
        tag_lines.append(parts[1])  # Tag line
        # Check if there are enough parts to extract review
        if len(parts) >= 4:
            reviews.append('\n'.join(parts[2:-2]))  # Review
        else:
            reviews.append("")  # Append empty string if review not found
        # Check if there are enough parts to extract reviewer name and address
        if len(parts) >= 6:
            reviewer_names.append(parts[-2])  # Reviewer name
            address_time = parts[-1].split(", ")  # Address and time
            # Check if there are enough parts to extract address and time
            if len(address_time) >= 2:
                addresses.append(address_time[1])  # Address
                times.append(address_time[0])  # Time
            else:
                addresses.append("")  # Append empty string if address not found
                times.append("")  # Append empty string if time not found
        else:
            reviewer_names.append("")  # Append empty string if reviewer name not found
            addresses.append("")  # Append empty string if address not found
            times.append("")  # Append empty string if time not found

# Create DataFrame
df = pd.DataFrame({
    'rating': ratings,
    'tag line': tag_lines,
    'review': reviews,
    'reviewer name': reviewer_names,
    'address': addresses,
    'time': times
})

print(df)


ValueError: All arrays must be of the same length

In [9]:
# Initialize empty lists to store data
ratings = []
tag_lines = []
reviews = []
reviewer_names = []
addresses = []
times = []

# Loop through list elements
for item in abc:
    if "Page" in item:  # Skip irrelevant line
        continue
    else:
        try:
            # Split the item by newline character and extract relevant information
            parts = item.split("\n")
            ratings.append(parts[0])  # Rating
            tag_lines.append(parts[1])  # Tag line
            reviews.append('\n'.join(parts[2:-2]))  # Review
            reviewer_names.append(parts[-2])  # Reviewer name
            address_time = parts[-1].split(", ")  # Address and time
            addresses.append(address_time[1])  # Address
            times.append(address_time[0])  # Time
        except IndexError:
            print("IndexError occurred while processing an item:", item)

# Create DataFrame
df = pd.DataFrame({
    'rating': ratings,
    'tag line': tag_lines,
    'review': reviews,
    'reviewer name': reviewer_names,
    'address': addresses,
    'time': times
})
df

IndexError occurred while processing an item: 5
Perfect product!
all about good thank you
rohit sharma
Certified Buyer, Yamunanagar
4 days ago
10
IndexError occurred while processing an item: 5
Great product
Awesome just Wow
Madhu Khan
Certified Buyer, Murshidabad District
5 days ago
00
IndexError occurred while processing an item: 5
Fabulous!
Really appreciate of the camera
Kaisor Khan
Certified Buyer, Bengaluru
5 days ago
00
IndexError occurred while processing an item: 5
Mind-blowing purchase
Great camera. High end performance. Amazing design.
Loving it.
Sumana Pati
Certified Buyer, Puruliya District
6 days ago
00
IndexError occurred while processing an item: 4
Worth the money
Very nice product
Manju saji
Certified Buyer, Thiruvananthapuram District
7 days ago
00
IndexError occurred while processing an item: 5
Classy product
Awesome 😎😎 Thanks Flipkart
Flipkart Customer
Certified Buyer, Faridabad
7 days ago
00
IndexError occurred while processing an item: 5
Wonderful
excellent produc

ValueError: All arrays must be of the same length