# Phase 1: Web Scraping

In this phase, we use Selenium to collect customer reviews including reviewer name, rating, title, text, and date.

In [5]:
# === Importing Required Libraries ===

# Standard libraries
import re
import random
import time
from datetime import datetime

# Data handling
import pandas as pd
import numpy as np

# Selenium for web scraping
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options

In [8]:
def load_browser():
    try:
        options = Options()
        options.page_load_strategy = 'eager'
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        options.add_argument('--disable-blink-features=AutomationControlled')
        options.add_experimental_option("excludeSwitches", ["enable-automation"])
        options.add_experimental_option('useAutomationExtension', False)
        options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')

        #Browser location
        options.binary_location = r"C:\Users\Anon\AppData\Local\BraveSoftware\Brave-Browser\Application\brave.exe"

        browser = webdriver.Chrome(options=options)
        print("Brave Driver Loaded Successfully")
        return browser

    except Exception as e:
        print("An unexpected Error Occurred:", e)


In [17]:
# Function to Scrap the data from our website

def scrap_data(url,browser):
    
    try:
        browser.get(url)
        print("page Loaded Successfully")
    except TimeoutException:
        print("Page load timed out, continuing anyway...")
        
        
    review_titles_all=[]
    review_texts_all=[]
    reviewer_names_all=[]
    ratings_all=[]
    review_dates_all=[]


    while True:
        try:
            review_items = WebDriverWait(browser, 10).until(
            EC.presence_of_all_elements_located(
            (By.CLASS_NAME,"review_1H0vP")
            )
            )

            try:
                
                for item in review_items:
                    
                    reviewer_name = item.find_element(By.XPATH, ".//span[@class='author_3-9SJ']//span[last()]")
                    rating=item.find_element(By.CLASS_NAME,"sr-only")
                    review_title=item.find_element(By.CLASS_NAME,"reviewTitle_27zYc")
                    review_date=item.find_element(By.CLASS_NAME,"locationAndTime_FDdpK")
                    review_text=item.find_element(By.CLASS_NAME, "reviewContent_wpBgx")
                    print("items extracted")
                    
                    reviewer_names_all.append(reviewer_name.text)
                    review_titles_all.append(review_title.text)
                    review_dates_all.append(review_date.text)
                    review_texts_all.append(review_text.text)
                    ratings_all.append(rating.text)
                    print("Items pushed")
                    print(review_title.text)
                    
            except:
                print("No more items left to review")
                
            
            try:
                button = WebDriverWait(browser, 5).until(
                    EC.element_to_be_clickable((By.XPATH, "//button[contains(text(),'Show More')]"))
                )
                browser.execute_script("arguments[0].click();", button)  # Use JS click to avoid interception
                print("Clicked 'Show More' button")
                time.sleep(3)  # Wait for content to load
            except:
                print("No more 'Show More' button found or clickable. Scraping complete.")
                break   
                
        except Exception as e:
            print(f"Error during scraping: {e}")
            break
    
    return review_titles_all,review_texts_all,reviewer_names_all,review_dates_all,ratings_all
           


In [18]:
# Function to unload Drivers
def unload_browsers(browser,length):
    browser.quit()
    print(f"Scraping completed. Total reviews collected: {len(length)}")

In [19]:
url="https://www.bestbuy.ca/en-ca/product/asus-rog-strix-scar-18-gaming-laptop-intel-core-ultra-9-275hx-64gb-ram-1tb-ssd-rtx-5080-exclusive-retail-partner/19186641/review"
try:
    browser=load_browser()
    review_titles,review_texts,reviewer_names,dates,ratings=scrap_data(url,browser)
    unload_browsers(browser,reviewer_names)
except:
    print("An unexpected Error occured during execution")

Brave Driver Loaded Successfully
page Loaded Successfully
items extracted
Items pushed
This laptop is a beast and keeps it cool
items extracted
Items pushed
Big, Beastly, Powerhouse
items extracted
Items pushed
Perfect replacement for gaming desktop
items extracted
Items pushed
The Latest & Greatest packed neatly in an 18" box
items extracted
Items pushed
Closest thing to a full-sized rig that I've owned!
items extracted
Items pushed
Powerful, large and in charge
items extracted
Items pushed
Portable Upgradable Power
items extracted
Items pushed
Monster specs and monster game play
items extracted
Items pushed
An excellent desktop-replacement gaming laptop
items extracted
Items pushed
Fantastic Performance, Excellent Thermals!
Clicked 'Show More' button
items extracted
Items pushed
This laptop is a beast and keeps it cool
items extracted
Items pushed
Big, Beastly, Powerhouse
items extracted
Items pushed
Perfect replacement for gaming desktop
items extracted
Items pushed
The Latest & Gre

# 2. Preparing the Data for CSV file

In [20]:
print(len(reviewer_names))
print(len(review_texts))
print(len(review_titles))
print(len(dates))
print(len(ratings))

1207
1207
1207
1207
1207


In [21]:
#Extracting rating from the rating list
index=random.randint(0,890)
final_ratings=[int(text.split(":")[1][1]) for text in ratings]
print(f"for index: {index}")
print(ratings[index])
print(final_ratings[index])


for index: 99
Customer rating: 5 out of 5 stars
5


In [22]:
# Removing some unecessary line (it occured in most of the reviews)
unecessary_line="[This review was collected as part of a promotion.] "
filtered_review_texts=[text.replace(unecessary_line,"") for text in review_texts]
filtered_review_texts[0]

'This laptop is so much bigger than my ROG STRIX G16, I was worried about the screen being ips with no hdr, but it looks super good especially at 4k with no light bleed. Running 3d mark scores (ill post the photos) it was well above avg scores and didnt even get over 80°C in fact its avg temp for the benchmark was only 65°C of you wanna game at 2k everything maxed out at over 100fps this is the laptop for you, if you wanna game at 4k mid to high settings this is the laptop for you.'

In [23]:
# Extracting the dates from the date string (from October 10,2024 to 2024-10-10)
def extract_date(text):
# Extract the date part 
    match = re.search(r"[A-Za-z]+\s\d{1,2},\s\d{4}", text)
    if match:
        date_str = match.group()
        
        # Parse the date string into a datetime object
        date_obj = datetime.strptime(date_str, "%B %d, %Y")
        
        # Convert to DD-MM-YY format
        formatted_date = date_obj.strftime("%Y-%m-%d")
        
        return formatted_date  # Output: 2024-10-10


In [24]:
#Calling extract_date function to extract dates
extracted_dates=[extract_date(date) for date in dates ]

print(f"For index: {index}")
extracted_dates[index]


For index: 99


'2025-05-31'

In [27]:
df=pd.DataFrame({
    "Reviewer's Name":reviewer_names,
    "Title":review_titles,
    "Review Text": filtered_review_texts,
    "Rating (Out of 5)": final_ratings,
    "Date Posted (YYYY-MM-DD)":extracted_dates
})

print(f"Before removing duplicates Dataframe Length: {len(df)}")
df=df.drop_duplicates() #removing Duplicates
df.index = range(1, len(df) + 1) # Adding index
print(f"After removing duplicates Dataframe Length: {len(df)}")

df.head()

Before removing duplicates Dataframe Length: 1207
After removing duplicates Dataframe Length: 146


Unnamed: 0,Reviewer's Name,Title,Review Text,Rating (Out of 5),Date Posted (YYYY-MM-DD)
1,Brandon,This laptop is a beast and keeps it cool,This laptop is so much bigger than my ROG STRI...,5,2025-05-10
2,JorgeST,"Big, Beastly, Powerhouse","Gaming has come a long way. In fact, this lapt...",5,2025-05-13
3,JustBeingHonest,Perfect replacement for gaming desktop,The ASUS ROG Strix is a very functional gaming...,5,2025-05-09
4,ferrari,"The Latest & Greatest packed neatly in an 18"" box",Over time I've had a number of gaming laptops;...,5,2025-05-09
5,Wraith,Closest thing to a full-sized rig that I've ow...,Ever since I have stopped buying/building full...,5,2025-05-10


In [28]:
# Saving Dataframe into reviews.csv
df.to_csv("data/reviews.csv",index=False)

# 3. Machine Learning Phase

Now that we have scraped data from our website and saved it in our csv file called `reviews.csv`, next phase will be Machine Learning Phase. 

In [1]:
#importing all necessary libraires
import os
import spacy
from dotenv import load_dotenv
from huggingface_hub import login
from transformers.pipelines import pipeline
import torch
import tensorflow as tf

In [2]:
#Loading HF Token
load_dotenv()
hf_token=os.getenv("HF_TOKEN")

In [3]:
login(hf_token)


Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


## 1. Data Loading Phase

In [4]:
import pandas as pd
df=pd.read_csv("data/reviews.csv")
df.head()

Unnamed: 0,Reviewer's Name,Title,Review Text,Rating (Out of 5),Date Posted (YYYY-MM-DD)
0,Brandon,This laptop is a beast and keeps it cool,This laptop is so much bigger than my ROG STRI...,5,2025-05-10
1,JorgeST,"Big, Beastly, Powerhouse","Gaming has come a long way. In fact, this lapt...",5,2025-05-13
2,JustBeingHonest,Perfect replacement for gaming desktop,The ASUS ROG Strix is a very functional gaming...,5,2025-05-09
3,ferrari,"The Latest & Greatest packed neatly in an 18"" box",Over time I've had a number of gaming laptops;...,5,2025-05-09
4,Wraith,Closest thing to a full-sized rig that I've ow...,Ever since I have stopped buying/building full...,5,2025-05-10


## 2. Data preprocessing phase

In [5]:

df["text"]=df["Title"]+df["Review Text"] #Combining two columns together
df.drop(["Title","Review Text"],axis=1,inplace=True)
df.dropna(subset=["text"],inplace=True) # Dropping rows with NaN values
df.head()

Unnamed: 0,Reviewer's Name,Rating (Out of 5),Date Posted (YYYY-MM-DD),text
0,Brandon,5,2025-05-10,This laptop is a beast and keeps it coolThis l...
1,JorgeST,5,2025-05-13,"Big, Beastly, PowerhouseGaming has come a long..."
2,JustBeingHonest,5,2025-05-09,Perfect replacement for gaming desktopThe ASUS...
3,ferrari,5,2025-05-09,"The Latest & Greatest packed neatly in an 18"" ..."
4,Wraith,5,2025-05-10,Closest thing to a full-sized rig that I've ow...


# 3. Text Summarization Phase
   The reason behind summarization is to reduce the length of the text while retaining the important information. This will help in better sentiment analysis and understanding of the text.

In [12]:
import torch
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") # Loading an Transformer model for text summarization

Device set to use cpu


In [14]:
def chunk_text(text, chunk_size=1000): 
    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] # since texts are so big, and some of the texts exceeds the token size of models, so we used chunks to pass the data in appropriate values

In [15]:
summarized_texts=[]
i=1

for text in df["text"].to_list():

  chunks = chunk_text(text) # we got list of chunks
  summaries = []
  for c in chunks:
      summaries.append(summarizer(c, max_length=150, min_length=50, do_sample=False)[0]['summary_text']) # chunks passed to model

  final_summary = " ".join(summaries) # list of summarized text combines back to a complete string
  summarized_texts.append(final_summary)
  print(f"{i} Summarized Text Added")
  i=i+1

Your max_length is set to 150, but your input_length is only 133. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=66)


1 Summarized Text Added
2 Summarized Text Added


Your max_length is set to 150, but your input_length is only 72. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=36)


3 Summarized Text Added


Your max_length is set to 150, but your input_length is only 66. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=33)


4 Summarized Text Added


Your max_length is set to 150, but your input_length is only 129. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=64)


5 Summarized Text Added
6 Summarized Text Added


Your max_length is set to 150, but your input_length is only 23. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)


7 Summarized Text Added


Your max_length is set to 150, but your input_length is only 76. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=38)


8 Summarized Text Added
9 Summarized Text Added


Your max_length is set to 150, but your input_length is only 66. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=33)


10 Summarized Text Added
11 Summarized Text Added


Your max_length is set to 150, but your input_length is only 123. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=61)


12 Summarized Text Added


Your max_length is set to 150, but your input_length is only 54. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=27)


13 Summarized Text Added
14 Summarized Text Added


Your max_length is set to 150, but your input_length is only 84. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=42)


15 Summarized Text Added


Your max_length is set to 150, but your input_length is only 136. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=68)


16 Summarized Text Added


Your max_length is set to 150, but your input_length is only 42. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=21)
Your max_length is set to 150, but your input_length is only 126. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=63)


17 Summarized Text Added


Your max_length is set to 150, but your input_length is only 83. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=41)


18 Summarized Text Added


Your max_length is set to 150, but your input_length is only 37. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=18)


19 Summarized Text Added
20 Summarized Text Added


Your max_length is set to 150, but your input_length is only 23. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)
Your max_length is set to 150, but your input_length is only 142. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=71)


21 Summarized Text Added


Your max_length is set to 150, but your input_length is only 139. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=69)


22 Summarized Text Added
23 Summarized Text Added


Your max_length is set to 150, but your input_length is only 109. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=54)
Your max_length is set to 150, but your input_length is only 139. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=69)


24 Summarized Text Added


Your max_length is set to 150, but your input_length is only 134. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=67)


25 Summarized Text Added
26 Summarized Text Added
27 Summarized Text Added


Your max_length is set to 150, but your input_length is only 101. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=50)


28 Summarized Text Added


Your max_length is set to 150, but your input_length is only 139. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=69)


29 Summarized Text Added
30 Summarized Text Added


Your max_length is set to 150, but your input_length is only 84. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=42)


31 Summarized Text Added


Your max_length is set to 150, but your input_length is only 88. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=44)


32 Summarized Text Added


Your max_length is set to 150, but your input_length is only 62. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=31)


33 Summarized Text Added


Your max_length is set to 150, but your input_length is only 71. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=35)


34 Summarized Text Added


Your max_length is set to 150, but your input_length is only 35. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=17)


35 Summarized Text Added


Your max_length is set to 150, but your input_length is only 23. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)


36 Summarized Text Added


Your max_length is set to 150, but your input_length is only 130. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=65)


37 Summarized Text Added


Your max_length is set to 150, but your input_length is only 20. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)


38 Summarized Text Added


Your max_length is set to 150, but your input_length is only 113. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=56)


39 Summarized Text Added


Your max_length is set to 150, but your input_length is only 117. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=58)


40 Summarized Text Added
41 Summarized Text Added


Your max_length is set to 150, but your input_length is only 16. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=8)
Your max_length is set to 150, but your input_length is only 136. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=68)


42 Summarized Text Added


Your max_length is set to 150, but your input_length is only 131. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=65)


43 Summarized Text Added


Your max_length is set to 150, but your input_length is only 120. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=60)


44 Summarized Text Added
45 Summarized Text Added


Your max_length is set to 150, but your input_length is only 106. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=53)


46 Summarized Text Added


Your max_length is set to 150, but your input_length is only 100. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=50)


47 Summarized Text Added
48 Summarized Text Added
49 Summarized Text Added


Your max_length is set to 150, but your input_length is only 87. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=43)


50 Summarized Text Added


Your max_length is set to 150, but your input_length is only 95. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=47)


51 Summarized Text Added
52 Summarized Text Added


Your max_length is set to 150, but your input_length is only 89. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=44)


53 Summarized Text Added


Your max_length is set to 150, but your input_length is only 83. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=41)


54 Summarized Text Added


Your max_length is set to 150, but your input_length is only 77. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=38)


55 Summarized Text Added


Your max_length is set to 150, but your input_length is only 64. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=32)


56 Summarized Text Added


Your max_length is set to 150, but your input_length is only 55. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=27)


57 Summarized Text Added


Your max_length is set to 150, but your input_length is only 71. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=35)


58 Summarized Text Added


Your max_length is set to 150, but your input_length is only 56. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=28)


59 Summarized Text Added


Your max_length is set to 150, but your input_length is only 70. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=35)


60 Summarized Text Added


Your max_length is set to 150, but your input_length is only 63. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=31)


61 Summarized Text Added


Your max_length is set to 150, but your input_length is only 57. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=28)


62 Summarized Text Added


Your max_length is set to 150, but your input_length is only 54. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=27)


63 Summarized Text Added


Your max_length is set to 150, but your input_length is only 50. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=25)


64 Summarized Text Added


Your max_length is set to 150, but your input_length is only 42. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=21)


65 Summarized Text Added


Your max_length is set to 150, but your input_length is only 57. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=28)


66 Summarized Text Added


Your max_length is set to 150, but your input_length is only 56. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=28)


67 Summarized Text Added


Your max_length is set to 150, but your input_length is only 60. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=30)


68 Summarized Text Added


Your max_length is set to 150, but your input_length is only 66. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=33)


69 Summarized Text Added


Your max_length is set to 150, but your input_length is only 50. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=25)


70 Summarized Text Added


Your max_length is set to 150, but your input_length is only 51. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=25)


71 Summarized Text Added


Your max_length is set to 150, but your input_length is only 45. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=22)


72 Summarized Text Added


Your max_length is set to 150, but your input_length is only 44. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=22)


73 Summarized Text Added


Your max_length is set to 150, but your input_length is only 38. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=19)


74 Summarized Text Added


Your max_length is set to 150, but your input_length is only 46. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=23)


75 Summarized Text Added


Your max_length is set to 150, but your input_length is only 46. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=23)


76 Summarized Text Added


Your max_length is set to 150, but your input_length is only 45. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=22)


77 Summarized Text Added


Your max_length is set to 150, but your input_length is only 41. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=20)


78 Summarized Text Added


Your max_length is set to 150, but your input_length is only 30. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=15)


79 Summarized Text Added


Your max_length is set to 150, but your input_length is only 32. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=16)


80 Summarized Text Added


Your max_length is set to 150, but your input_length is only 40. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=20)


81 Summarized Text Added


Your max_length is set to 150, but your input_length is only 36. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=18)


82 Summarized Text Added


Your max_length is set to 150, but your input_length is only 32. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=16)


83 Summarized Text Added


Your max_length is set to 150, but your input_length is only 29. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)


84 Summarized Text Added


Your max_length is set to 150, but your input_length is only 40. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=20)


85 Summarized Text Added


Your max_length is set to 150, but your input_length is only 32. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=16)


86 Summarized Text Added


Your max_length is set to 150, but your input_length is only 32. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=16)


87 Summarized Text Added


Your max_length is set to 150, but your input_length is only 30. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=15)


88 Summarized Text Added


Your max_length is set to 150, but your input_length is only 35. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=17)


89 Summarized Text Added


Your max_length is set to 150, but your input_length is only 32. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=16)


90 Summarized Text Added


Your max_length is set to 150, but your input_length is only 29. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)


91 Summarized Text Added


Your max_length is set to 150, but your input_length is only 34. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=17)


92 Summarized Text Added


Your max_length is set to 150, but your input_length is only 29. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)


93 Summarized Text Added


Your max_length is set to 150, but your input_length is only 33. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=16)


94 Summarized Text Added


Your max_length is set to 150, but your input_length is only 27. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=13)


95 Summarized Text Added
96 Summarized Text Added


Your max_length is set to 150, but your input_length is only 42. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=21)
Your max_length is set to 150, but your input_length is only 32. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=16)


97 Summarized Text Added


Your max_length is set to 150, but your input_length is only 26. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=13)


98 Summarized Text Added


Your max_length is set to 150, but your input_length is only 28. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)


99 Summarized Text Added


Your max_length is set to 150, but your input_length is only 32. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=16)


100 Summarized Text Added


Your max_length is set to 150, but your input_length is only 21. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)


101 Summarized Text Added


Your max_length is set to 150, but your input_length is only 32. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=16)


102 Summarized Text Added


Your max_length is set to 150, but your input_length is only 22. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)


103 Summarized Text Added


Your max_length is set to 150, but your input_length is only 26. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=13)


104 Summarized Text Added


Your max_length is set to 150, but your input_length is only 25. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)


105 Summarized Text Added


Your max_length is set to 150, but your input_length is only 21. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)


106 Summarized Text Added


Your max_length is set to 150, but your input_length is only 18. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)


107 Summarized Text Added


Your max_length is set to 150, but your input_length is only 25. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)


108 Summarized Text Added
109 Summarized Text Added


Your max_length is set to 150, but your input_length is only 60. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=30)
Your max_length is set to 150, but your input_length is only 25. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)


110 Summarized Text Added


Your max_length is set to 150, but your input_length is only 20. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)


111 Summarized Text Added


Your max_length is set to 150, but your input_length is only 20. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)


112 Summarized Text Added


Your max_length is set to 150, but your input_length is only 17. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=8)


113 Summarized Text Added


Your max_length is set to 150, but your input_length is only 19. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)


114 Summarized Text Added


Your max_length is set to 150, but your input_length is only 20. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)


115 Summarized Text Added


Your max_length is set to 150, but your input_length is only 22. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)


116 Summarized Text Added


Your max_length is set to 150, but your input_length is only 18. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)


117 Summarized Text Added


Your max_length is set to 150, but your input_length is only 25. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)


118 Summarized Text Added


Your max_length is set to 150, but your input_length is only 22. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)


119 Summarized Text Added


Your max_length is set to 150, but your input_length is only 17. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=8)


120 Summarized Text Added


Your max_length is set to 150, but your input_length is only 16. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=8)


121 Summarized Text Added


Your max_length is set to 150, but your input_length is only 16. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=8)


122 Summarized Text Added


Your max_length is set to 150, but your input_length is only 16. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=8)


123 Summarized Text Added


Your max_length is set to 150, but your input_length is only 22. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)


124 Summarized Text Added


Your max_length is set to 150, but your input_length is only 18. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)


125 Summarized Text Added


Your max_length is set to 150, but your input_length is only 18. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)


126 Summarized Text Added


Your max_length is set to 150, but your input_length is only 19. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)


127 Summarized Text Added


Your max_length is set to 150, but your input_length is only 19. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)


128 Summarized Text Added


Your max_length is set to 150, but your input_length is only 19. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)


129 Summarized Text Added


Your max_length is set to 150, but your input_length is only 24. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)


130 Summarized Text Added


Your max_length is set to 150, but your input_length is only 17. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=8)


131 Summarized Text Added


Your max_length is set to 150, but your input_length is only 18. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)


132 Summarized Text Added


Your max_length is set to 150, but your input_length is only 21. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)


133 Summarized Text Added


Your max_length is set to 150, but your input_length is only 7. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=3)


134 Summarized Text Added
135 Summarized Text Added
136 Summarized Text Added
137 Summarized Text Added
138 Summarized Text Added
139 Summarized Text Added


Your max_length is set to 150, but your input_length is only 32. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=16)


140 Summarized Text Added


Your max_length is set to 150, but your input_length is only 149. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=74)


141 Summarized Text Added


Your max_length is set to 150, but your input_length is only 28. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)


142 Summarized Text Added


Your max_length is set to 150, but your input_length is only 50. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=25)


143 Summarized Text Added


Your max_length is set to 150, but your input_length is only 136. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=68)


144 Summarized Text Added


Your max_length is set to 150, but your input_length is only 133. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=66)


145 Summarized Text Added


Your max_length is set to 150, but your input_length is only 8. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=4)


146 Summarized Text Added


In [16]:
df["summarized_texts"]=summarized_texts
df.to_csv("summarized_reviews.csv",index=False)

## 3. Sentiment Analysis Phase

Since its a sentiment analysis task, we will use a pre-trained model from Hugging Face. We will use the siebert/sentiment-roberta-large-english` model for sentiment analysis. also since this analysis is also aspect based analysis, so what i did here, that when the text were passing through the model, I used spacy to extract the important aspects (Adjectives) from texts, which were responsible for the sentiment of the text. with this step i was able to get the sentiment of the text with respect to the aspects present in the text.

In [17]:
summarized_df=pd.read_csv("data\summarized_reviews.csv")
summarized_df.head()

  summarized_df=pd.read_csv("data\summarized_reviews.csv")


Unnamed: 0,Reviewer's Name,Rating (Out of 5),Date Posted (YYYY-MM-DD),text,summarized_texts
0,Brandon,5,2025-05-10,This laptop is a beast and keeps it coolThis l...,This laptop is so much bigger than my ROG STR...
1,JorgeST,5,2025-05-13,"Big, Beastly, PowerhouseGaming has come a long...",This gaming laptop offers (nearly) every bell...
2,JustBeingHonest,5,2025-05-09,Perfect replacement for gaming desktopThe ASUS...,The screen is beautifully bright and sharp . ...
3,ferrari,5,2025-05-09,"The Latest & Greatest packed neatly in an 18"" ...","The latest & Greatest packed neatly in an 18""..."
4,Wraith,5,2025-05-10,Closest thing to a full-sized rig that I've ow...,ROG Strix G18 is the closest to a full-sized ...


In [None]:
sentiment_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english") # An open source HF model

config.json:   0%|          | 0.00/687 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


pytorch_model.bin:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

In [None]:
sentiment_model(summarized_df["summarized_texts"][0])

In [None]:
# in this section we tried to extract sentiment from the texts as well as adjectives which are related to nouns from the texts
nlp = spacy.load("en_core_web_sm")

final_list = []

for texts in summarized_df["summarized_texts"].to_list():
    doc = nlp(texts)
    adjectives = []

    # Extract adjectives related to nouns
    for token in doc:
        if token.pos_ == "NOUN": # here we checked for noun
            for child in token.children:
                if child.pos_ == "ADJ":
                    adjectives.append(child.text)

    # Get overall sentiment
    response = sentiment_model(texts)
    overall_sentiment = response[0]["label"] # Extracted sentiment

    final_list.append({
        "Aspects": adjectives[:3],  # extract only first three adjectives
        "Sentiment": overall_sentiment
    })

final_list

In [None]:
summarized_df["Sentiment"]=final_list
df.to_csv("final_csv.csv",index=False)