In [None]:
!pip install transformers spacy
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m48.6 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
#load the data
import pandas as pd
df = pd.read_csv('cleaned_reviews.csv')
print(df.columns)

Index(['sentiments', 'cleaned_review', 'cleaned_review_length',
       'review_score'],
      dtype='object')


In [None]:
#Sentiment analysis
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
# 1. Setup
nltk.download('vader_lexicon') # Run this once in your notebook
sia = SentimentIntensityAnalyzer()
df = pd.read_csv('cleaned_reviews.csv')
df['cleaned_review'] = df['cleaned_review'].fillna("")
def get_vader_sentiment(text):
    score = sia.polarity_scores(text)
    if score['compound'] >= 0.05:
        return "Positive"
    elif score['compound'] <= -0.05:
        return "Negative"
    else:
        return "Neutral"
df_sentiment = df.head(100)
df_sentiment['ai_sentiment'] = df_sentiment['cleaned_review'].apply(get_vader_sentiment)
print(df_sentiment[['cleaned_review', 'ai_sentiment']].head())

                                      cleaned_review ai_sentiment
0  i wish would have gotten one earlier love it a...     Positive
1  i ve learned this lesson again open the packag...     Positive
2          it is so slow and lags find better option      Neutral
3  roller ball stopped working within months of m...     Negative
4  i like the color and size but it few days out ...     Positive


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sentiment['ai_sentiment'] = df_sentiment['cleaned_review'].apply(get_vader_sentiment)


In [None]:
#NER
import pandas as pd
import spacy
nlp = spacy.load("en_core_web_sm")
df = pd.read_csv('cleaned_reviews.csv')
df['cleaned_review'] = df['cleaned_review'].fillna("")
# 2. Intermediate NER Function
def get_spacy_entities(text):
    if not text: return "No entities"
    doc = nlp(text)
    interesting_labels = {'ORG', 'PERSON', 'GPE', 'DATE', 'PRODUCT'}
    entities = [f"{ent.text} ({ent.label_})" for ent in doc.ents if ent.label_ in interesting_labels]
    return ", ".join(list(set(entities))) if entities else "None found"
df_ner = df.head(50).copy()
print("Starting NER extraction...")
df_ner['entities'] = df_ner['cleaned_review'].apply(get_spacy_entities)

print("\n--- NER RESULTS ---")
print(df_ner[['cleaned_review', 'entities']].head(10))

Starting NER extraction...

--- NER RESULTS ---
                                      cleaned_review  \
0  i wish would have gotten one earlier love it a...   
1  i ve learned this lesson again open the packag...   
2          it is so slow and lags find better option   
3  roller ball stopped working within months of m...   
4  i like the color and size but it few days out ...   
5  overall love this mouse the size weight clicki...   
6                                 it stopped working   
7  my son uses school issued chromebook for schoo...   
8  loved this cute little mouse but it broke afte...   
9  should ve spent the money to get quality produ...   

                                            entities  
0                                         None found  
1  couple of weeks (DATE), august (DATE), mid sep...  
2                                         None found  
3                                      months (DATE)  
4                                         None found  
5    

In [None]:
#SUMMARIZATION
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
model_name = "sshleifer/distilbart-cnn-12-6"
print("Loading model... this might take a moment.")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
def get_ai_summary(text):
    if pd.isna(text) or len(str(text).split()) < 20:
        return "Review too short to summarize."
    try:
        inputs = tokenizer([text], max_length=1024, return_tensors="pt", truncation=True)
        # Generate the summary
        summary_ids = model.generate(
            inputs["input_ids"],
            num_beams=4,
            max_length=40,
            min_length=10,
            early_stopping=True
        )
        return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    except Exception as e:
        return f"Error: {str(e)}"

Loading model... this might take a moment.


Loading weights:   0%|          | 0/358 [00:00<?, ?it/s]



In [None]:
df = pd.read_csv('cleaned_reviews.csv')
sample_df = df.head(10).copy()
print("Summarizing reviews...")
sample_df['ai_summary'] = sample_df['cleaned_review'].apply(get_ai_summary)
print("\n--- SUMMARIZATION RESULTS ---")
print(sample_df[['cleaned_review', 'ai_summary']].head())

Summarizing reviews...

--- SUMMARIZATION RESULTS ---
                                      cleaned_review  \
0  i wish would have gotten one earlier love it a...   
1  i ve learned this lesson again open the packag...   
2          it is so slow and lags find better option   
3  roller ball stopped working within months of m...   
4  i like the color and size but it few days out ...   

                                          ai_summary  
0                     Review too short to summarize.  
1   The optical light will flash once when turned...  
2                     Review too short to summarize.  
3                     Review too short to summarize.  
4  i like the color and size but it few days out ...  
