In [1]:
!pip install nltk textblob spacy pandas matplotlib seaborn



In [2]:
import pandas as pd
import nltk
import spacy
from textblob import TextBlob
import matplotlib.pyplot as plt
import seaborn as sns

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [5]:
from google.colab import files
uploaded = files.upload()

import pandas as pd
import io

df = pd.read_csv(io.BytesIO(uploaded['sample_reviews.csv']))
df.head()

Saving sample_reviews.csv to sample_reviews.csv


Unnamed: 0,review_id,review_text
0,1,The product quality is excellent but delivery ...
1,2,Fast delivery and great packaging. Product is ...
2,3,Very poor customer service and the item was da...
3,4,Loved the quality and the price was affordable.
4,5,"Packaging was terrible, but the delivery was o..."


In [7]:
import nltk

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')     # Needed for lemmatization
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [9]:
import re
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Download required resources again (excluding punkt)
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def simple_preprocess(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    words = text.split()  # basic tokenization
    words = [lemmatizer.lemmatize(w) for w in words if w not in stop_words]
    return " ".join(words)

df['cleaned_review'] = df['review_text'].apply(simple_preprocess)
df[['review_text', 'cleaned_review']]

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Unnamed: 0,review_text,cleaned_review
0,The product quality is excellent but delivery ...,product quality excellent delivery delayed
1,Fast delivery and great packaging. Product is ...,fast delivery great packaging product decent
2,Very poor customer service and the item was da...,poor customer service item damaged
3,Loved the quality and the price was affordable.,loved quality price affordable
4,"Packaging was terrible, but the delivery was o...",packaging terrible delivery time


In [10]:
# Define some common aspects
aspect_keywords = ['quality', 'delivery', 'packaging', 'price', 'service']

# Function to find mentioned aspects in each review
def extract_aspects(text):
    return [word for word in aspect_keywords if word in text]

df['aspects'] = df['cleaned_review'].apply(extract_aspects)
df[['cleaned_review', 'aspects']]

Unnamed: 0,cleaned_review,aspects
0,product quality excellent delivery delayed,"[quality, delivery]"
1,fast delivery great packaging product decent,"[delivery, packaging]"
2,poor customer service item damaged,[service]
3,loved quality price affordable,"[quality, price]"
4,packaging terrible delivery time,"[delivery, packaging]"


In [12]:
!python -m textblob.download_corpora

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data] Downloading package conll2000 to /root/nltk_data...
[nltk_data]   Unzipping corpora/conll2000.zip.
[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.
Finished.


In [13]:
from textblob import TextBlob

def get_aspect_sentiments(row):
    aspects = row['aspects']
    text = row['review_text']
    blob = TextBlob(text)

    aspect_sentiments = {}
    for aspect in aspects:
        sentences = [str(s) for s in blob.sentences if aspect in s.lower()]
        if sentences:
            sentiment = TextBlob(' '.join(sentences)).sentiment.polarity
            aspect_sentiments[aspect] = sentiment
    return aspect_sentiments

df['aspect_sentiments'] = df.apply(get_aspect_sentiments, axis=1)
df[['review_text', 'aspects', 'aspect_sentiments']]

Unnamed: 0,review_text,aspects,aspect_sentiments
0,The product quality is excellent but delivery ...,"[quality, delivery]","{'quality': 1.0, 'delivery': 1.0}"
1,Fast delivery and great packaging. Product is ...,"[delivery, packaging]","{'delivery': 0.5, 'packaging': 0.5}"
2,Very poor customer service and the item was da...,[service],{'service': -0.52}
3,Loved the quality and the price was affordable.,"[quality, price]","{'quality': 0.7, 'price': 0.7}"
4,"Packaging was terrible, but the delivery was o...","[delivery, packaging]","{'delivery': -1.0, 'packaging': -1.0}"
