In [11]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.tag import pos_tag


In [12]:
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Dakshayeni\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Dakshayeni\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [13]:
def loadLexicon(fname):
    newLex=set()
    lex_conn=open(fname)
    
    #add every word in the file to the set
    for line in lex_conn:
        newLex.add(line.strip())# remember to strip to remove the lin-change character
    lex_conn.close()

    return newLex


In [14]:
def analyze_review(review):
    positive_words = loadLexicon('positive-words.txt')
    negative_words = loadLexicon('negative-words.txt')

    sentiment_dict = {}
    sentences = sent_tokenize(review)
    for sentence in sentences:
        words = word_tokenize(sentence)
        tagged_words = pos_tag(words)
        for word, tag in tagged_words:
            if tag.startswith('NN') and len(word) >= 3:  # Noun
                P = sum(1 for w in words if w in positive_words)
                N = sum(1 for w in words if w in negative_words)
                sentiment_dict[word] = sentiment_dict.get(word, {'positive': 0, 'negative': 0})
                if P > N:
                    sentiment_dict[word]['positive'] += 1  # Positive sentiment
                elif P < N:
                    sentiment_dict[word]['negative'] += 1  # Negative sentiment
    return sentiment_dict

In [15]:
df_product1 = pd.read_csv('beats_solo3_amazon_reviews.csv')
df_product2 = pd.read_csv('sony_amazon_reviews.csv')

In [16]:
def aggregate_sentiments(reviews):
    aggregated_sentiments = {}
    for review in reviews:
        review_sentiments = analyze_review(review)
        for noun, sentiment in review_sentiments.items():
            if noun not in aggregated_sentiments:
                aggregated_sentiments[noun] = {'positive': 0, 'negative': 0}
            aggregated_sentiments[noun]['positive'] += sentiment['positive']
            aggregated_sentiments[noun]['negative'] += sentiment['negative']
    return aggregated_sentiments


In [17]:
aggregated_product1 = aggregate_sentiments(df_product1['review'])
aggregated_product2 = aggregate_sentiments(df_product2['review'])

In [18]:
print(aggregated_product1)


{'Beats': {'positive': 57, 'negative': 19}, 'Cordless': {'positive': 3, 'negative': 0}, 'Headphones': {'positive': 16, 'negative': 1}, 'expectations': {'positive': 2, 'negative': 0}, 'terms': {'positive': 1, 'negative': 1}, 'convenience': {'positive': 8, 'negative': 0}, 'functionality': {'positive': 3, 'negative': 0}, 'experience': {'positive': 10, 'negative': 2}, 'quality': {'positive': 91, 'negative': 11}, 'life': {'positive': 37, 'negative': 3}, 'standout': {'positive': 2, 'negative': 0}, 'features': {'positive': 7, 'negative': 0}, 'headphones': {'positive': 187, 'negative': 50}, 'design': {'positive': 19, 'negative': 3}, 'bag': {'positive': 4, 'negative': 1}, 'pocket': {'positive': 0, 'negative': 1}, 'hassle': {'positive': 0, 'negative': 1}, 'portability': {'positive': 0, 'negative': 0}, 'times': {'positive': 1, 'negative': 4}, 'opportunity': {'positive': 0, 'negative': 1}, 'material': {'positive': 1, 'negative': 2}, 'connection': {'positive': 11, 'negative': 9}, 'breeze': {'positi

In [19]:
common_nouns = set(aggregated_product1.keys()) & set(aggregated_product2.keys())
opposite_sentiments_nouns = []

for noun in common_nouns:
    sentiment_product1 = aggregated_product1[noun]
    sentiment_product2 = aggregated_product2[noun]

    if (sentiment_product1['positive'] > sentiment_product1['negative'] and sentiment_product2['positive'] < sentiment_product2['negative']) or \
       (sentiment_product1['positive'] < sentiment_product1['negative'] and sentiment_product2['positive'] > sentiment_product2['negative']):
        opposite_sentiments_nouns.append(noun)

def calculate_overall_sentiment(aggregated_sentiments):
    total_positive = sum(sentiments['positive'] for sentiments in aggregated_sentiments.values())
    total_negative = sum(sentiments['negative'] for sentiments in aggregated_sentiments.values())
    return total_positive - total_negative

overall_sentiment_product1 = calculate_overall_sentiment(aggregated_product1)
overall_sentiment_product2 = calculate_overall_sentiment(aggregated_product2)
better_product = "Beats Solo 3" if overall_sentiment_product1 > overall_sentiment_product2 else "Sony"

def top_positive(product):
    keys_list=[]
    top_positive_keys = sorted(product.items(), key=lambda x: x[1]['positive'], reverse=True)[:10]
    for key, value in top_positive_keys:
        keys_list.append(key)

    return keys_list

def top_negative(product):
    keys_list=[]
    top_negative_keys = sorted(product.items(), key=lambda x: x[1]['negative'], reverse=True)[:10]
    for key, value in top_negative_keys:
         keys_list.append(key)

    return keys_list

pos_product1=top_positive(aggregated_product1)
pos_product2=top_positive(aggregated_product2)
neg_product1=top_negative(aggregated_product1)
neg_product2=top_negative(aggregated_product2)

with open("report.txt", "w") as file:
    file.write("Comparative Analysis Report\n")
    file.write("----------------------------\n")
    file.write(f"Overall Sentiment - Beats Solo 3: {overall_sentiment_product1}\n")
    file.write(f"Overall Sentiment - Sony: {overall_sentiment_product2}\n")
    file.write(f"Based on the analysis, {better_product} is generally perceived more positively by the customers.\n")
    file.write("\nNouns with Opposite Sentiments:\n")
    for noun in opposite_sentiments_nouns:
        file.write(f"- {noun}\n")
    file.write("\nKeys with top 10 Positive values in Beats:\n")
    for key in pos_product1:
        file.write(f"- {key}\n")
    file.write("\nKeys with top 10 Negative values in Beats:\n")
    for key in neg_product1:
        file.write(f"- {key}\n")
    file.write("\nKeys with top 10 Positive values in Sony:\n")
    for key in pos_product2:
        file.write(f"- {key}\n")
    file.write("\nKeys with top 10 Negative values in Sony:\n")
    for key in neg_product2:
        file.write(f"- {key}\n")
    
    
print("Report generated!")

Report generated!
