In [30]:
import requests
from selenium import webdriver
from bs4 import BeautifulSoup
import time
import pandas as pd
import re
import emoji
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer

nltk.download('punkt')
nltk.download('stopwords')

def preprocess_text(text):
    text = emoji.demojize(text, delimiters=(" ", " "))
    text = re.sub(r'@\w+', 'socialmention', text)
    text = re.sub(r'#\w+', 'hashtag', text)
    text = re.sub(r'\W', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    words = word_tokenize(text.lower())
    words = [word for word in words if word.isalpha()]
    return ' '.join(words)

stop_words = set(stopwords.words('english'))

options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome(options=options)

url = "https://www.amazon.com/Perfect-Son-gripping-psychological-breathtaking/dp/B086PMZR78/"
driver.get(url)
time.sleep(5)
html = driver.page_source
driver.quit()
soup = BeautifulSoup(html, 'html.parser')
reviews = soup.find_all('span', {'data-hook': 'review-body'})
review_texts = [review.get_text(strip=True) for review in reviews]

vectorizer = TfidfVectorizer()
classifier = LinearSVC()
preprocess = FunctionTransformer(lambda x: [preprocess_text(text) for text in x], validate=False)

training_data = pd.read_csv('Sentimental Analysis Data.csv')
training_data['text'] = training_data['text'].fillna('')

model_pipeline = make_pipeline(preprocess, vectorizer, classifier)
model_pipeline.fit(training_data['text'], training_data['sentiment'])

sentiments = []
ratings = []
for review in review_texts:
    predicted_label = model_pipeline.predict([review])[0]
    sentiments.append(predicted_label)
    rating = 5 if predicted_label == 'positive' else 1
    ratings.append(rating)
    print(f"Review: {review}\nPredicted Sentiment: {predicted_label}, Rating: {rating}")

average_rating = sum(ratings) / len(ratings) if ratings else 0
print(f"Overall product rating based on user reviews: {average_rating:.2f}/5")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\anmol\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\anmol\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Review: This truly was a book I didn’t want to put down and thought I had it all figured out but I was so wrong. It really had a twist towards the end. The main character Erika Cass had a pretty good life except her son was different from others. She’s known this all along and has tried to keep him and others safe from him. When a girl her son likes goes missing, she right away assumes he’s the one who did it. It also appears the police and community feel this same. The book is written in each chapter either by a character or an interview about the event of the missing girl. I’m not saying anymore because it really has a great twist. A super good read.Read more
Predicted Sentiment: positive, Rating: 5
Review: Good read, has a great twist. I didn’t see coming. This was a proverbial hard to put down. good characters good developments, hard to guess what was happening?Read more
Predicted Sentiment: positive, Rating: 5
Review: I have read numerous novels by this author lately because she k