In [None]:
# Import necessary libraries
import pandas as pd
import spacy
from nltk.sentiment import SentimentIntensityAnalyzer

# Load dataset
data = pd.read_csv('/content/Womens Clothing E-Commerce Reviews.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,Clothing ID,Age,Title,Review Text,Rating,Recommended IND,Positive Feedback Count,Division Name,Department Name,Class Name
0,0,767,33,,Absolutely wonderful - silky and sexy and comf...,4,1,0,Initmates,Intimate,Intimates
1,1,1080,34,,Love this dress! it's sooo pretty. i happene...,5,1,4,General,Dresses,Dresses
2,2,1077,60,Some major design flaws,I had such high hopes for this dress and reall...,3,0,0,General,Dresses,Dresses
3,3,1049,50,My favorite buy!,"I love, love, love this jumpsuit. it's fun, fl...",5,1,0,General Petite,Bottoms,Pants
4,4,847,47,Flattering shirt,This shirt is very flattering to all due to th...,5,1,6,General,Tops,Blouses


In [None]:
# Initialize spaCy's English model for NER
nlp = spacy.load("en_core_web_sm")

# Function to perform NER and extract entities
def extract_entities(text):
    doc = nlp(text)
    return [(ent.text, ent.label_) for ent in doc.ents]

# Apply NER to the review texts
data['Entities'] = data['Review Text'].dropna().apply(extract_entities)



In [None]:
data['Entities']

0                           []
1        [(Love, WORK_OF_ART)]
2           [(half, CARDINAL)]
3                           []
4                           []
                 ...          
23481                       []
23482        [(one, CARDINAL)]
23483                       []
23484    [(this summer, DATE)]
23485                       []
Name: Entities, Length: 23486, dtype: object

In [None]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [None]:
# Initialize the sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Function to analyze sentiment of the review
def analyze_sentiment(text):
    sentiment_score = sia.polarity_scores(text)
    return sentiment_score

# Apply sentiment analysis to the review texts
data['Sentiment'] = data['Review Text'].dropna().apply(analyze_sentiment)

data['Sentiment']

0        {'neg': 0.0, 'neu': 0.272, 'pos': 0.728, 'comp...
1        {'neg': 0.0, 'neu': 0.664, 'pos': 0.336, 'comp...
2        {'neg': 0.027, 'neu': 0.792, 'pos': 0.181, 'co...
3        {'neg': 0.226, 'neu': 0.34, 'pos': 0.434, 'com...
4        {'neg': 0.0, 'neu': 0.7, 'pos': 0.3, 'compound...
                               ...                        
23481    {'neg': 0.065, 'neu': 0.547, 'pos': 0.388, 'co...
23482    {'neg': 0.068, 'neu': 0.735, 'pos': 0.197, 'co...
23483    {'neg': 0.0, 'neu': 0.704, 'pos': 0.296, 'comp...
23484    {'neg': 0.068, 'neu': 0.812, 'pos': 0.12, 'com...
23485    {'neg': 0.0, 'neu': 0.495, 'pos': 0.505, 'comp...
Name: Sentiment, Length: 23486, dtype: object

In [None]:
# Define a function for custom entity identification using a dictionary
def custom_entity_identification(text, entity_dict):
    identified_entities = {}
    for key, values in entity_dict.items():
        for value in values:
            if value.lower() in text.lower():
                identified_entities.setdefault(key, []).append(value)
    return identified_entities

# Define custom entities dictionary
custom_entities_dict = {
    'brands': ['BrandA', 'BrandB'],
    'product_names': ['ProductX', 'ProductY']
}

# Apply custom entity identification
data['Custom Entities'] = data['Review Text'].dropna().apply(lambda x: custom_entity_identification(x, custom_entities_dict))

# Function to simulate entity linking based on Clothing ID
def link_entities_by_id(clothing_id, database):
    return database.get(clothing_id, "No additional information available.")

# Simulate a database linking Clothing IDs to product details
clothing_database = {
    767: "Silky intimate wear",
    1080: "Elegant summer dress",
    1077: "Floral print dress",
    1049: "Casual jumpsuit",
    847: "Comfortable cotton shirt"
}

# Link entities based on Clothing ID
data['Linked Entities'] = data['Clothing ID'].map(lambda x: link_entities_by_id(x, data['Class Name']	))

# Display the first few rows of the dataframe to check results
print(data[['Review Text', 'Entities', 'Sentiment', 'Custom Entities', 'Linked Entities']].head())

                                         Review Text               Entities  \
0  Absolutely wonderful - silky and sexy and comf...                     []   
1  Love this dress!  it's sooo pretty.  i happene...  [(Love, WORK_OF_ART)]   
2  I had such high hopes for this dress and reall...     [(half, CARDINAL)]   
3  I love, love, love this jumpsuit. it's fun, fl...                     []   
4  This shirt is very flattering to all due to th...                     []   

                                           Sentiment Custom Entities  \
0  {'neg': 0.0, 'neu': 0.272, 'pos': 0.728, 'comp...              {}   
1  {'neg': 0.0, 'neu': 0.664, 'pos': 0.336, 'comp...              {}   
2  {'neg': 0.027, 'neu': 0.792, 'pos': 0.181, 'co...              {}   
3  {'neg': 0.226, 'neu': 0.34, 'pos': 0.434, 'com...              {}   
4  {'neg': 0.0, 'neu': 0.7, 'pos': 0.3, 'compound...              {}   

  Linked Entities  
0         Dresses  
1         Blouses  
2         Blouses  
3        Swe

In [None]:
data

Unnamed: 0.1,Unnamed: 0,Clothing ID,Age,Title,Review Text,Rating,Recommended IND,Positive Feedback Count,Division Name,Department Name,Class Name,Entities,Sentiment,Custom Entities,Linked Entities
0,0,767,33,,Absolutely wonderful - silky and sexy and comf...,4,1,0,Initmates,Intimate,Intimates,[],"{'neg': 0.0, 'neu': 0.272, 'pos': 0.728, 'comp...",{},Dresses
1,1,1080,34,,Love this dress! it's sooo pretty. i happene...,5,1,4,General,Dresses,Dresses,"[(Love, WORK_OF_ART)]","{'neg': 0.0, 'neu': 0.664, 'pos': 0.336, 'comp...",{},Blouses
2,2,1077,60,Some major design flaws,I had such high hopes for this dress and reall...,3,0,0,General,Dresses,Dresses,"[(half, CARDINAL)]","{'neg': 0.027, 'neu': 0.792, 'pos': 0.181, 'co...",{},Blouses
3,3,1049,50,My favorite buy!,"I love, love, love this jumpsuit. it's fun, fl...",5,1,0,General Petite,Bottoms,Pants,[],"{'neg': 0.226, 'neu': 0.34, 'pos': 0.434, 'com...",{},Sweaters
4,4,847,47,Flattering shirt,This shirt is very flattering to all due to th...,5,1,6,General,Tops,Blouses,[],"{'neg': 0.0, 'neu': 0.7, 'pos': 0.3, 'compound...",{},Pants
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23481,23481,1104,34,Great dress for many occasions,I was very happy to snag this dress at such a ...,5,1,0,General Petite,Dresses,Dresses,[],"{'neg': 0.065, 'neu': 0.547, 'pos': 0.388, 'co...",{},Blouses
23482,23482,862,48,Wish it was made of cotton,"It reminds me of maternity clothes. soft, stre...",3,1,0,General Petite,Tops,Knits,"[(one, CARDINAL)]","{'neg': 0.068, 'neu': 0.735, 'pos': 0.197, 'co...",{},Jackets
23483,23483,1104,31,"Cute, but see through","This fit well, but the top was very see throug...",3,0,1,General Petite,Dresses,Dresses,[],"{'neg': 0.0, 'neu': 0.704, 'pos': 0.296, 'comp...",{},Blouses
23484,23484,1084,28,"Very cute dress, perfect for summer parties an...",I bought this dress for a wedding i have this ...,3,1,2,General,Dresses,Dresses,"[(this summer, DATE)]","{'neg': 0.068, 'neu': 0.812, 'pos': 0.12, 'com...",{},Dresses
