In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Aspect-based sentiment analysis is a technique used to identify the sentiment of a particular aspect or feature of a product or service. It involves breaking down a text into smaller components, such as sentences or phrases, and then analyzing the sentiment of each component with respect to a specific aspect



Various steps in Aspect based sentiment analysis

- Data Cleaning: Remove unwanted characters, convert to lowercase, remove stop words, and perform lemmatization.

- Tokenization: Split the sentences into individual tokens.

- Parts of Speech Tagging: Assign each token a part of speech tag (noun, verb, adjective, etc.)

- Dependency Parsing: Identify the grammatical relationships between the tokens.

- Aspect Extraction: Identify the aspects of the product that are being talked about in the review.

- Sentiment Analysis: Determine the sentiment polarity (positive, negative, or neutral) of each aspect.



In [27]:
# Import required libraries
import pandas as pd
import numpy as np
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score


In [46]:
def clean_text(text):
    if pd.isna(text):
        return ''
    text = text.lower() # Convert to lowercase
    text = re.sub(r'\d+', '', text) # Remove numbers
    text = text.translate(str.maketrans('', '', string.punctuation)) # Remove punctuation
    return text

In [47]:
# Load the dataset
df = pd.read_csv('/kaggle/input/women-reviews-on-clothes-ecommerce/women reviews on clothes e-commerce.csv')


In [48]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /usr/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [49]:
# import os
# os.chdir('/usr/share/nltk_data/corpora/')
# !unzip wordnet.zip

Archive:  wordnet.zip
replace wordnet/lexnames? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [52]:
df['clean_text'] = df['Review Text'].apply(clean_text)


In [53]:
df

Unnamed: 0.1,Unnamed: 0,Clothing ID,Age,Title,Review Text,Rating,Recommended IND,Positive Feedback Count,Division Name,Department Name,Class Name,clean_text
0,0,767,33,,Absolutely wonderful - silky and sexy and comf...,4,1,0,Initmates,Intimate,Intimates,absolutely wonderful silky and sexy and comfo...
1,1,1080,34,,Love this dress! it's sooo pretty. i happene...,5,1,4,General,Dresses,Dresses,love this dress its sooo pretty i happened t...
2,2,1077,60,Some major design flaws,I had such high hopes for this dress and reall...,3,0,0,General,Dresses,Dresses,i had such high hopes for this dress and reall...
3,3,1049,50,My favorite buy!,"I love, love, love this jumpsuit. it's fun, fl...",5,1,0,General Petite,Bottoms,Pants,i love love love this jumpsuit its fun flirty ...
4,4,847,47,Flattering shirt,This shirt is very flattering to all due to th...,5,1,6,General,Tops,Blouses,this shirt is very flattering to all due to th...
...,...,...,...,...,...,...,...,...,...,...,...,...
6520,6520,1095,60,This jumper is perfect!,I saw this on line this morning and was hesita...,5,1,1,General,Dresses,Dresses,i saw this on line this morning and was hesita...
6521,6521,903,49,So love! so cute!,I saw these sweaters in the store this fall an...,5,1,1,General Petite,Tops,Fine gauge,i saw these sweaters in the store this fall an...
6522,6522,1033,47,Disappointing,I have purchased many different styles of pilc...,3,0,3,General,Bottoms,Jeans,i have purchased many different styles of pilc...
6523,6523,1020,45,,Love this skirt... perfect length and cut! pil...,5,1,0,General,Bottoms,Skirts,love this skirt perfect length and cut pilcro ...


In [54]:
# Tokenization and Parts of Speech Tagging
def tokenize_and_tag(text):
    tokens = word_tokenize(text) # Split into tokens
    pos_tags = nltk.pos_tag(tokens) # Assign Part of Speech tags
    return pos_tags

df['pos_tags'] = df['clean_text'].apply(tokenize_and_tag)


In [55]:
df

Unnamed: 0.1,Unnamed: 0,Clothing ID,Age,Title,Review Text,Rating,Recommended IND,Positive Feedback Count,Division Name,Department Name,Class Name,clean_text,pos_tags
0,0,767,33,,Absolutely wonderful - silky and sexy and comf...,4,1,0,Initmates,Intimate,Intimates,absolutely wonderful silky and sexy and comfo...,"[(absolutely, RB), (wonderful, JJ), (silky, NN..."
1,1,1080,34,,Love this dress! it's sooo pretty. i happene...,5,1,4,General,Dresses,Dresses,love this dress its sooo pretty i happened t...,"[(love, VB), (this, DT), (dress, NN), (its, PR..."
2,2,1077,60,Some major design flaws,I had such high hopes for this dress and reall...,3,0,0,General,Dresses,Dresses,i had such high hopes for this dress and reall...,"[(i, NN), (had, VBD), (such, JJ), (high, JJ), ..."
3,3,1049,50,My favorite buy!,"I love, love, love this jumpsuit. it's fun, fl...",5,1,0,General Petite,Bottoms,Pants,i love love love this jumpsuit its fun flirty ...,"[(i, NN), (love, VBP), (love, NN), (love, NN),..."
4,4,847,47,Flattering shirt,This shirt is very flattering to all due to th...,5,1,6,General,Tops,Blouses,this shirt is very flattering to all due to th...,"[(this, DT), (shirt, NN), (is, VBZ), (very, RB..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6520,6520,1095,60,This jumper is perfect!,I saw this on line this morning and was hesita...,5,1,1,General,Dresses,Dresses,i saw this on line this morning and was hesita...,"[(i, NN), (saw, VBD), (this, DT), (on, IN), (l..."
6521,6521,903,49,So love! so cute!,I saw these sweaters in the store this fall an...,5,1,1,General Petite,Tops,Fine gauge,i saw these sweaters in the store this fall an...,"[(i, NN), (saw, VBD), (these, DT), (sweaters, ..."
6522,6522,1033,47,Disappointing,I have purchased many different styles of pilc...,3,0,3,General,Bottoms,Jeans,i have purchased many different styles of pilc...,"[(i, NNS), (have, VBP), (purchased, VBN), (man..."
6523,6523,1020,45,,Love this skirt... perfect length and cut! pil...,5,1,0,General,Bottoms,Skirts,love this skirt perfect length and cut pilcro ...,"[(love, VB), (this, DT), (skirt, NN), (perfect..."


In [56]:
# Dependency Parsing (using spaCy)
import spacy
nlp = spacy.load('en_core_web_sm')

def parse_dependency(text):
    doc = nlp(text)
    return [(token.text, token.dep_) for token in doc]

df['dependency'] = df['clean_text'].apply(parse_dependency)


In [57]:
df

Unnamed: 0.1,Unnamed: 0,Clothing ID,Age,Title,Review Text,Rating,Recommended IND,Positive Feedback Count,Division Name,Department Name,Class Name,clean_text,pos_tags,dependency
0,0,767,33,,Absolutely wonderful - silky and sexy and comf...,4,1,0,Initmates,Intimate,Intimates,absolutely wonderful silky and sexy and comfo...,"[(absolutely, RB), (wonderful, JJ), (silky, NN...","[(absolutely, advmod), (wonderful, amod), ( , ..."
1,1,1080,34,,Love this dress! it's sooo pretty. i happene...,5,1,4,General,Dresses,Dresses,love this dress its sooo pretty i happened t...,"[(love, VB), (this, DT), (dress, NN), (its, PR...","[(love, ROOT), (this, det), (dress, dobj), ( ,..."
2,2,1077,60,Some major design flaws,I had such high hopes for this dress and reall...,3,0,0,General,Dresses,Dresses,i had such high hopes for this dress and reall...,"[(i, NN), (had, VBD), (such, JJ), (high, JJ), ...","[(i, nsubj), (had, ROOT), (such, amod), (high,..."
3,3,1049,50,My favorite buy!,"I love, love, love this jumpsuit. it's fun, fl...",5,1,0,General Petite,Bottoms,Pants,i love love love this jumpsuit its fun flirty ...,"[(i, NN), (love, VBP), (love, NN), (love, NN),...","[(i, nsubj), (love, ROOT), (love, compound), (..."
4,4,847,47,Flattering shirt,This shirt is very flattering to all due to th...,5,1,6,General,Tops,Blouses,this shirt is very flattering to all due to th...,"[(this, DT), (shirt, NN), (is, VBZ), (very, RB...","[(this, det), (shirt, nsubj), (is, ROOT), (ver..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6520,6520,1095,60,This jumper is perfect!,I saw this on line this morning and was hesita...,5,1,1,General,Dresses,Dresses,i saw this on line this morning and was hesita...,"[(i, NN), (saw, VBD), (this, DT), (on, IN), (l...","[(i, nsubj), (saw, ROOT), (this, dobj), (on, p..."
6521,6521,903,49,So love! so cute!,I saw these sweaters in the store this fall an...,5,1,1,General Petite,Tops,Fine gauge,i saw these sweaters in the store this fall an...,"[(i, NN), (saw, VBD), (these, DT), (sweaters, ...","[(i, nsubj), (saw, ROOT), (these, det), (sweat..."
6522,6522,1033,47,Disappointing,I have purchased many different styles of pilc...,3,0,3,General,Bottoms,Jeans,i have purchased many different styles of pilc...,"[(i, NNS), (have, VBP), (purchased, VBN), (man...","[(i, nsubj), (have, aux), (purchased, ROOT), (..."
6523,6523,1020,45,,Love this skirt... perfect length and cut! pil...,5,1,0,General,Bottoms,Skirts,love this skirt perfect length and cut pilcro ...,"[(love, VB), (this, DT), (skirt, NN), (perfect...","[(love, nsubj), (this, det), (skirt, nmod), (p..."


In [58]:
# Aspect Extraction
def extract_aspect(text):
    doc = nlp(text)
    aspects = []
    for chunk in doc.noun_chunks:
        if chunk.root.dep_ == 'nsubj' or chunk.root.dep_ == 'dobj':
            aspect = chunk.text.strip()
            if len(aspect.split()) > 1:
                aspects.append(aspect)
    return aspects


In [59]:
df['aspects'] = df['clean_text'].apply(extract_aspect)


In [60]:
df

Unnamed: 0.1,Unnamed: 0,Clothing ID,Age,Title,Review Text,Rating,Recommended IND,Positive Feedback Count,Division Name,Department Name,Class Name,clean_text,pos_tags,dependency,aspects
0,0,767,33,,Absolutely wonderful - silky and sexy and comf...,4,1,0,Initmates,Intimate,Intimates,absolutely wonderful silky and sexy and comfo...,"[(absolutely, RB), (wonderful, JJ), (silky, NN...","[(absolutely, advmod), (wonderful, amod), ( , ...",[]
1,1,1080,34,,Love this dress! it's sooo pretty. i happene...,5,1,4,General,Dresses,Dresses,love this dress its sooo pretty i happened t...,"[(love, VB), (this, DT), (dress, NN), (its, PR...","[(love, ROOT), (this, det), (dress, dobj), ( ,...","[this dress, its sooo, online bc, the length]"
2,2,1077,60,Some major design flaws,I had such high hopes for this dress and reall...,3,0,0,General,Dresses,Dresses,i had such high hopes for this dress and reall...,"[(i, NN), (had, VBD), (such, JJ), (high, JJ), ...","[(i, nsubj), (had, ROOT), (such, amod), (high,...","[such high hopes, the petite small my usual si..."
3,3,1049,50,My favorite buy!,"I love, love, love this jumpsuit. it's fun, fl...",5,1,0,General Petite,Bottoms,Pants,i love love love this jumpsuit its fun flirty ...,"[(i, NN), (love, VBP), (love, NN), (love, NN),...","[(i, nsubj), (love, ROOT), (love, compound), (...","[love love, its fun flirty]"
4,4,847,47,Flattering shirt,This shirt is very flattering to all due to th...,5,1,6,General,Tops,Blouses,this shirt is very flattering to all due to th...,"[(this, DT), (shirt, NN), (is, VBZ), (very, RB...","[(this, det), (shirt, nsubj), (is, ROOT), (ver...","[this shirt, any cardigan, this shirt]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6520,6520,1095,60,This jumper is perfect!,I saw this on line this morning and was hesita...,5,1,1,General,Dresses,Dresses,i saw this on line this morning and was hesita...,"[(i, NN), (saw, VBD), (this, DT), (on, IN), (l...","[(i, nsubj), (saw, ROOT), (this, dobj), (on, p...","[local anthopology store, a size, the size, th..."
6521,6521,903,49,So love! so cute!,I saw these sweaters in the store this fall an...,5,1,1,General Petite,Tops,Fine gauge,i saw these sweaters in the store this fall an...,"[(i, NN), (saw, VBD), (these, DT), (sweaters, ...","[(i, nsubj), (saw, ROOT), (these, det), (sweat...","[these sweaters, a pass, the little pockets, a..."
6522,6522,1033,47,Disappointing,I have purchased many different styles of pilc...,3,0,3,General,Bottoms,Jeans,i have purchased many different styles of pilc...,"[(i, NNS), (have, VBP), (purchased, VBN), (man...","[(i, nsubj), (have, aux), (purchased, ROOT), (...","[many different styles, all these, the fit, a ..."
6523,6523,1020,45,,Love this skirt... perfect length and cut! pil...,5,1,0,General,Bottoms,Skirts,love this skirt perfect length and cut pilcro ...,"[(love, VB), (this, DT), (skirt, NN), (perfect...","[(love, nsubj), (this, det), (skirt, nmod), (p...",[this skirt perfect length]


In [61]:
# Sentiment Analysis (using Multinomial Naive Bayes)
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['clean_text'])
y = df['Rating'].apply(lambda x: 'positive' if x > 3 else 'negative')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = MultinomialNB()

In [62]:
model.fit(X_train, y_train)


In [63]:
y_pred = model.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.8674329501915709
              precision    recall  f1-score   support

    negative       0.72      0.65      0.68       286
    positive       0.90      0.93      0.92      1019

    accuracy                           0.87      1305
   macro avg       0.81      0.79      0.80      1305
weighted avg       0.86      0.87      0.86      1305



#### Combine aspects and corresponding sentiments


In [71]:
def extract_sentiment(text):
    doc = nlp(text)
    aspects = []
    for chunk in doc.noun_chunks:
        if chunk.root.dep_ == 'nsubj' or chunk.root.dep_ == 'dobj':
            aspect = chunk.text.strip()
            if len(aspect.split()) > 1:
                aspect_sentiment = model.predict(vectorizer.transform([text]))[0]
                aspects.append((aspect, aspect_sentiment))
    return aspects


In [72]:
df['aspect_sentiment'] = df['clean_text'].apply(extract_sentiment)

In [73]:

df

Unnamed: 0.1,Unnamed: 0,Clothing ID,Age,Title,Review Text,Rating,Recommended IND,Positive Feedback Count,Division Name,Department Name,Class Name,clean_text,pos_tags,dependency,aspects,aspect_sentiment
0,0,767,33,,Absolutely wonderful - silky and sexy and comf...,4,1,0,Initmates,Intimate,Intimates,absolutely wonderful silky and sexy and comfo...,"[(absolutely, RB), (wonderful, JJ), (silky, NN...","[(absolutely, advmod), (wonderful, amod), ( , ...",[],[]
1,1,1080,34,,Love this dress! it's sooo pretty. i happene...,5,1,4,General,Dresses,Dresses,love this dress its sooo pretty i happened t...,"[(love, VB), (this, DT), (dress, NN), (its, PR...","[(love, ROOT), (this, det), (dress, dobj), ( ,...","[this dress, its sooo, online bc, the length]","[(this dress, positive), (its sooo, positive),..."
2,2,1077,60,Some major design flaws,I had such high hopes for this dress and reall...,3,0,0,General,Dresses,Dresses,i had such high hopes for this dress and reall...,"[(i, NN), (had, VBD), (such, JJ), (high, JJ), ...","[(i, nsubj), (had, ROOT), (such, amod), (high,...","[such high hopes, the petite small my usual si...","[(such high hopes, negative), (the petite smal..."
3,3,1049,50,My favorite buy!,"I love, love, love this jumpsuit. it's fun, fl...",5,1,0,General Petite,Bottoms,Pants,i love love love this jumpsuit its fun flirty ...,"[(i, NN), (love, VBP), (love, NN), (love, NN),...","[(i, nsubj), (love, ROOT), (love, compound), (...","[love love, its fun flirty]","[(love love, positive), (its fun flirty, posit..."
4,4,847,47,Flattering shirt,This shirt is very flattering to all due to th...,5,1,6,General,Tops,Blouses,this shirt is very flattering to all due to th...,"[(this, DT), (shirt, NN), (is, VBZ), (very, RB...","[(this, det), (shirt, nsubj), (is, ROOT), (ver...","[this shirt, any cardigan, this shirt]","[(this shirt, positive), (any cardigan, positi..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6520,6520,1095,60,This jumper is perfect!,I saw this on line this morning and was hesita...,5,1,1,General,Dresses,Dresses,i saw this on line this morning and was hesita...,"[(i, NN), (saw, VBD), (this, DT), (on, IN), (l...","[(i, nsubj), (saw, ROOT), (this, dobj), (on, p...","[local anthopology store, a size, the size, th...","[(local anthopology store, positive), (a size,..."
6521,6521,903,49,So love! so cute!,I saw these sweaters in the store this fall an...,5,1,1,General Petite,Tops,Fine gauge,i saw these sweaters in the store this fall an...,"[(i, NN), (saw, VBD), (these, DT), (sweaters, ...","[(i, nsubj), (saw, ROOT), (these, det), (sweat...","[these sweaters, a pass, the little pockets, a...","[(these sweaters, positive), (a pass, positive..."
6522,6522,1033,47,Disappointing,I have purchased many different styles of pilc...,3,0,3,General,Bottoms,Jeans,i have purchased many different styles of pilc...,"[(i, NNS), (have, VBP), (purchased, VBN), (man...","[(i, nsubj), (have, aux), (purchased, ROOT), (...","[many different styles, all these, the fit, a ...","[(many different styles, negative), (all these..."
6523,6523,1020,45,,Love this skirt... perfect length and cut! pil...,5,1,0,General,Bottoms,Skirts,love this skirt perfect length and cut pilcro ...,"[(love, VB), (this, DT), (skirt, NN), (perfect...","[(love, nsubj), (this, det), (skirt, nmod), (p...",[this skirt perfect length],"[(this skirt perfect length, positive)]"


In [74]:
df.to_csv('aspect_sentiment.csv', index=False)
