In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Web driver
driver = webdriver.Chrome()

def scrape_amazon_reviews(url, max_page=5):
    reviews = []
    for page in range(1, max_page + 1):
        # print("Page:", page)
        page_url = f'{url}{page}?ie=UTF8&reviewerType=all_reviews&pageNumber={page}'
        driver.get(page_url)

        review_elements = driver.find_elements(By.XPATH, "//div[@data-hook='review']")
        if review_elements:
            for review in review_elements:
                review_data_dict = {}
                review_data_dict['reviewer_name'] = review.find_element(By.XPATH, ".//span[@class='a-profile-name']").text
                review_data_dict['review_date'] = review.find_element(By.XPATH, ".//span[@data-hook='review-date']").text
                review_data_dict['rating'] = review.find_element(By.XPATH, ".//i[contains(@class, 'review-rating')]/span").get_attribute('innerText')
                review_data_dict['review_text'] = review.find_element(By.XPATH, ".//span[@data-hook='review-body']").text
                reviews.append(review_data_dict)
        else:
            print(f'NO Reviews Found on Page {page}')
            break

        try:
            next_button = driver.find_element(By.XPATH, "//li[@class='a-last']//a")
            next_button.click()
            WebDriverWait(driver, 10).until(EC.url_changes(page_url))
        except NoSuchElementException:
            print("Next Page not found")
            break

    return reviews

def scrape_flipkart_reviews(url, max_page=5):
    reviews = []
    headers = {
        'User-Agent': 'Use your own user agent',
        'Accept-Language': 'en-us,en;q=0.5'
    }

    for i in range(1, max_page + 1):
        page_url = f'{url}{i}'
        page = requests.get(page_url, headers=headers)
        soup = BeautifulSoup(page.content, 'html.parser')

        names = soup.find_all('p', class_='_2NsDsF AwS1CA')
        titles = soup.find_all('p', class_='z9E0IG')
        ratings = soup.find_all('div', class_=['XQDdHH Ga3i8K', 'XQDdHH Czs3gR Ga3i8K' , 'XQDdHH Js30Fc Ga3i8K'])
        comments = soup.find_all('div', class_='ZmyHeo')

        for name, title, rating, comment in zip(names, titles, ratings, comments):
            review_data_dict = {}
            review_data_dict['reviewer_name'] = name.get_text()
            review_data_dict['review_title'] = title.get_text()
            review_data_dict['rating'] = rating.get_text() if rating else '0'
            review_data_dict['review_text'] = comment.div.div.get_text(strip=True)
            reviews.append(review_data_dict)

    return reviews

amazon_url = "https://www.amazon.in/Apple-iPhone-13-128GB-Blue/product-reviews/B09G9BL5CP/ref=cm_cr_arp_d_paging_btm_next_"
flipkart_url = "https://www.flipkart.com/apple-iphone-15-plus-black-256-gb/product-reviews/itm4b0608e773fc5?pid=MOBGTAGPWKT2VSBB&lid=LSTMOBGTAGPWKT2VSBBYV0FGC&marketplace=FLIPKART&page="

amazon_reviews = scrape_amazon_reviews(amazon_url)
flipkart_reviews = scrape_flipkart_reviews(flipkart_url)

driver.close()

df_amazon = pd.DataFrame(amazon_reviews)
df_flipkart = pd.DataFrame(flipkart_reviews)

In [4]:
df_amazon.head()

Unnamed: 0,reviewer_name,review_date,rating,review_text
0,Pankaj Kumar,Reviewed in India on 24 February 2024,5.0 out of 5 stars,The iPhone 13 128GB has surpassed my expectati...
1,vaibhav,Reviewed in India on 13 November 2023,5.0 out of 5 stars,I snagged the iPhone 13 during the Great India...
2,ApTreX,Reviewed in India on 6 February 2022,5.0 out of 5 stars,My honest review after going broke buying this...
3,Faiyaz,Reviewed in India on 5 January 2024,4.0 out of 5 stars,Design:\nThe iPhone 13 retains the iconic desi...
4,Avnish Shukla,Reviewed in India on 26 April 2024,5.0 out of 5 stars,"Good camera, nice performance, excellent displ..."


In [5]:
df_flipkart.head()

Unnamed: 0,reviewer_name,review_title,rating,review_text
0,Sagar Behera,Best in the market!,5,Go fr it
1,Anshul Duhan,Must buy!,5,Best in class
2,Gundabattina SaradhiMuneendra,Mind-blowing purchase,5,Fabulous 😍Loved itCamera awesome 😘Performance ...
3,Ashutosh Singh,Must buy!,5,Blue colour is very lightBut performance is ve...
4,Anirudhya Ghosh,Worth every penny,5,Premium Colour


In [6]:
%%writefile test.py
import streamlit as st
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import requests
from bs4 import BeautifulSoup
import pandas as pd



# Web driver
driver = webdriver.Chrome()

def scrape_amazon_reviews(url, max_page=5):
    reviews = []
    for page in range(1, max_page + 1):
        print("Page:", page)
        page_url = f'{url}{page}?ie=UTF8&reviewerType=all_reviews&pageNumber={page}'
        driver.get(page_url)

        review_elements = driver.find_elements(By.XPATH, "//div[@data-hook='review']")
        if review_elements:
            for review in review_elements:
                review_data_dict = {}
                review_data_dict['reviewer_name'] = review.find_element(By.XPATH, ".//span[@class='a-profile-name']").text
                review_data_dict['review_date'] = review.find_element(By.XPATH, ".//span[@data-hook='review-date']").text
                review_data_dict['rating'] = review.find_element(By.XPATH, ".//i[contains(@class, 'review-rating')]/span").get_attribute('innerText')
                review_data_dict['review_text'] = review.find_element(By.XPATH, ".//span[@data-hook='review-body']").text
                reviews.append(review_data_dict)
        else:
            print(f'NO Reviews Found on Page {page}')
            break

        try:
            next_button = driver.find_element(By.XPATH, "//li[@class='a-last']//a")
            next_button.click()
            WebDriverWait(driver, 10).until(EC.url_changes(page_url))
        except NoSuchElementException:
            print("Next Page not found")
            break

    return reviews

def scrape_flipkart_reviews(url, max_page=5):
    reviews = []
    headers = {
        'User-Agent': 'Use your own user agent',
        'Accept-Language': 'en-us,en;q=0.5'
    }

    for i in range(1, max_page + 1):
        page_url = f'{url}{i}'
        page = requests.get(page_url, headers=headers)
        soup = BeautifulSoup(page.content, 'html.parser')

        names = soup.find_all('p', class_='_2NsDsF AwS1CA')
        titles = soup.find_all('p', class_='z9E0IG')
        ratings = soup.find_all('div', class_=['XQDdHH Ga3i8K', 'XQDdHH Czs3gR Ga3i8K' , 'XQDdHH Js30Fc Ga3i8K'])
        comments = soup.find_all('div', class_='ZmyHeo')

        for name, title, rating, comment in zip(names, titles, ratings, comments):
            review_data_dict = {}
            review_data_dict['reviewer_name'] = name.get_text()
            review_data_dict['review_title'] = title.get_text()
            review_data_dict['rating'] = rating.get_text() if rating else '0'
            review_data_dict['review_text'] = comment.div.div.get_text(strip=True)
            reviews.append(review_data_dict)

    return reviews

# Streamlit code
st.title('Web Scraping App')

option = st.sidebar.selectbox(
    'Which website do you want to scrape?',
    ('Amazon', 'Flipkart')
)

url = st.sidebar.text_input('Enter the URL of the product')

if st.sidebar.button('Scrape'):
    if option == 'Amazon':
        reviews = scrape_amazon_reviews(url)
    else:
        reviews = scrape_flipkart_reviews(url)

    df = pd.DataFrame(reviews)
    st.write(df)



driver.close()

Overwriting test.py


# with sentiment

In [7]:
%%writefile test_senti.py
import streamlit as st
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import requests
from bs4 import BeautifulSoup
import google.generativeai as genai
import google.ai.generativelanguage as glm
import pandas as pd

genai.configure(api_key="AIzaSyAVdmSlrL8PA62m--SllyfcOACQ5S2ws5U")  #AIzaSyD9Tj4yxSUTFYRZaFtPnqCaiWUgMW3m4J4


# Web driver
driver = webdriver.Chrome()

def scrape_amazon_reviews(url, max_page=5):
    reviews = []
    for page in range(1, max_page + 1):
        print("Page:", page)
        page_url = f'{url}{page}?ie=UTF8&reviewerType=all_reviews&pageNumber={page}'
        driver.get(page_url)

        review_elements = driver.find_elements(By.XPATH, "//div[@data-hook='review']")
        if review_elements:
            for review in review_elements:
                review_data_dict = {}
                review_data_dict['reviewer_name'] = review.find_element(By.XPATH, ".//span[@class='a-profile-name']").text
                review_data_dict['review_date'] = review.find_element(By.XPATH, ".//span[@data-hook='review-date']").text
                review_data_dict['rating'] = review.find_element(By.XPATH, ".//i[contains(@class, 'review-rating')]/span").get_attribute('innerText')
                review_data_dict['review_text'] = review.find_element(By.XPATH, ".//span[@data-hook='review-body']").text
                reviews.append(review_data_dict)
        else:
            print(f'NO Reviews Found on Page {page}')
            break

        try:
            next_button = driver.find_element(By.XPATH, "//li[@class='a-last']//a")
            next_button.click()
            WebDriverWait(driver, 10).until(EC.url_changes(page_url))
        except NoSuchElementException:
            print("Next Page not found")
            break

    return reviews

def scrape_flipkart_reviews(url, max_page=5):
    reviews = []
    headers = {
        'User-Agent': 'Use your own user agent',
        'Accept-Language': 'en-us,en;q=0.5'
    }

    for i in range(1, max_page + 1):
        page_url = f'{url}{i}'
        page = requests.get(page_url, headers=headers)
        soup = BeautifulSoup(page.content, 'html.parser')

        names = soup.find_all('p', class_='_2NsDsF AwS1CA')
        titles = soup.find_all('p', class_='z9E0IG')
        ratings = soup.find_all('div', class_=['XQDdHH Ga3i8K', 'XQDdHH Czs3gR Ga3i8K' , 'XQDdHH Js30Fc Ga3i8K'])
        comments = soup.find_all('div', class_='ZmyHeo')

        for name, title, rating, comment in zip(names, titles, ratings, comments):
            review_data_dict = {}
            review_data_dict['reviewer_name'] = name.get_text()
            review_data_dict['review_title'] = title.get_text()
            review_data_dict['rating'] = rating.get_text() if rating else '0'
            review_data_dict['review_text'] = comment.div.div.get_text(strip=True)
            reviews.append(review_data_dict)

    return reviews

# Streamlit code
st.title('Web Scraping & Sentimental Analysis App')

option = st.sidebar.selectbox(
    'Which website do you want to scrape?',
    ('Amazon', 'Flipkart')
)

url = st.sidebar.text_input('Enter the URL of the product')

if st.sidebar.button('Scrape'):
    if option == 'Amazon':
        reviews = scrape_amazon_reviews(url)
    else:
        reviews = scrape_flipkart_reviews(url)

    df = pd.DataFrame(reviews)

    # st.write(df)

    # st.header("Sentiment Analysis of Reviews")
    data = df
    sentiment = []
    for reviews in df['review_text']:

             # model selection
        model = genai.GenerativeModel('gemini-pro')

        prompt = """ Give the Sentiment analysis of given review only in two words either ***POSITIVE** 😀😀  or **NEGATIVE** ☹️☹️ """

        response = model.generate_content([prompt, reviews])
            #  st.write(reviews)
            #  st.write(response.text)
        sentiment.append(response.text)
            #  st.write("=====================================================================================")
    df['Sentiment'] = sentiment 

    st.write(df)



driver.close()

Overwriting test_senti.py


In [1]:
print("hello")

hello
