# Task 5

In [None]:
import nltk
import pandas as pd
import numpy as np
from nltk.corpus import wordnet, stopwords, opinion_lexicon
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

In [37]:
#target attributes
attributes = {
    "price": ["affordable", "expensive", "reasonable", "cheap", "overpriced", "budget-friendly", "cost-effective", "pricey", "inexpensive", "value for money", "price tag", "pricy", "wallet-friendly", "economical", "price point", "exorbitantly"],
    "food": ["delicious", "tasty", "flavorful", "savory", "yummy", "delectable", "mouthwatering", "palatable", "satisfying", "tasty dishes", "culinary experience", "taste buds", "appetizing", "gourmet", "scrumptious", "gourmand", "great", "fresh", "good", "tasteless"],
    "location": ["central", "convenient", "far", "noisy", "quiet", "serene", "scenic", "urban", "suburban", "remote", "bustling", "peaceful", "picturesque", "rural", "accessible", "secluded", "cool", "suggest", "good"],
    "service": ["hostile","friendly", "efficient", "attentive", "prompt", "courteous", "responsive", "helpful", "accommodating", "welcoming", "professional", "impersonal", "slow", "inattentive", "rude", "lackluster", "polite", "packaging", "delivery", "stupid"],
}

In [4]:
# Define negation words
negation_words = ["not", "none", "nt"]

In [5]:
def preprocess_review(review):
    # Tokenize and tag the words in the review
    tokens = word_tokenize(review)
    tagged_tokens = pos_tag(tokens)
    return tagged_tokens

In [6]:
def remove_stopwords(tagged_tokens):
    stop_words = set(stopwords.words('english'))
    stop_words -= set(negation_words)  # Remove negation words from stop_words
    return [(token, pos) for token, pos in tagged_tokens if token.lower() not in stop_words]

In [34]:
def extract_attributes(tagged_tokens):
    extracted_attributes = {}
    
    for attribute, attribute_keywords in attributes.items():
        attribute_words = set()
        sentiment = 0
        
        for i in range(1, len(tagged_tokens)):
            token, pos = tagged_tokens[i]
            
            if pos in ['JJ', 'JJR', 'JJS', 'RB', 'RBR', 'RBS']:
                
                # Check if the token is an attribute keyword or a synonym
                if token.lower() in attribute_keywords:
                    attribute_words.add(token.lower())
                else:
                    # Find synonyms and antonyms for the token using WordNet
                    synonyms = set()
                    antonyms = set()
                    for syn in wordnet.synsets(token):
                        for lemma in syn.lemmas():
                            synonyms.add(lemma.name().lower())
                        for antonym in lemma.antonyms():
                            antonyms.add(antonym.name().lower())
                    if any(keyword in synonyms or keyword in antonyms for keyword in attribute_keywords):
                        attribute_words.add(token.lower())
            
            prev_token, _ = tagged_tokens[i-1]
            if prev_token in negation_words:
                if token in opinion_lexicon.positive():
                    sentiment -= 1
                elif token in opinion_lexicon.negative():
                    sentiment += 1
            else:
                if token in opinion_lexicon.positive():
                    sentiment += 1
                elif token in opinion_lexicon.negative():
                    sentiment -= 1

        if attribute_words:
            # Determine the sentiment polarity for the attribute
            if sentiment > 0:
                sentiment_polarity = "1"
            elif sentiment < 0:
                sentiment_polarity = "-1"
            else:
                sentiment_polarity = "0"
        elif not attribute_words:
                sentiment_polarity = "0"
       
        extracted_attributes[attribute] = {
           "sentiment": sentiment_polarity
        }
    return extracted_attributes

In [8]:
def print_extracted_attributes(extracted_attributes):
   
    for attribute, data in extracted_attributes.items():
        sentiment = data["sentiment"]
        print(f"{attribute.capitalize()}: (Sentiment: {sentiment})")

In [39]:
# Sample user review
user_review = "I was thoroughly disappointed with my recent visit to Cafe Five. The food was mediocre at best, and the prices were outrageously high. It felt like I was paying a premium for subpar, tasteless dishes."
tagged_tokens = preprocess_review(user_review)
filtered_tokens = remove_stopwords(tagged_tokens)
attributes_data = extract_attributes(filtered_tokens)
print_extracted_attributes(attributes_data)

Price: (Sentiment: 0)
Food: (Sentiment: -1)
Location: (Sentiment: -1)
Service: (Sentiment: 0)


In [31]:
df = pd.read_csv('/path to/cleanedWithAll.csv')

In [47]:
#write polarity to CSV
price = []
food = []
location = []
service =[]

for review in df['Review']:
    tagged_tokens = preprocess_review(review)
    filtered_tokens = remove_stopwords(tagged_tokens)
    attributes_data = extract_attributes(filtered_tokens)
    attributes_data_array = np.array(list(attributes_data.values()))
    price.append(attributes_data_array[0]['sentiment'])
    food.append(attributes_data_array[1]['sentiment'])
    location.append(attributes_data_array[2]['sentiment'])
    service.append(attributes_data_array[3]['sentiment'])
    
df['pricePolarity'] = price
df['foodPolarity'] = food
df['locationPolarity'] = location
df['servicePolarity'] = service

df.to_csv(r'/path to/dataWithPolarities.csv',index=False, sep=',', encoding='utf-8')