In [14]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [15]:
# Load the data
df = pd.read_csv('master_review.csv')

In [16]:
# Convert datatypes to best possiblle
df = df.convert_dtypes()
# Convert date to datetime
df['Date'] = pd.to_datetime(df['Date'])
# Verifying the data types
df.dtypes

Market_place           string[python]
URL                    string[python]
Product_name           string[python]
SKU                    string[python]
Price                           Int64
Currency               string[python]
Total_stars_ranking           Float64
Subject                string[python]
Author                 string[python]
Date                   datetime64[ns]
Review                 string[python]
Stars                           Int64
like                            Int64
dislike                         Int64
dtype: object

In [17]:
# Initialize the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("LiYuan/amazon-review-sentiment-analysis")
model = AutoModelForSequenceClassification.from_pretrained("LiYuan/amazon-review-sentiment-analysis")

In [18]:
def get_sentiment(text):
    # Truncate the text to a maximum length of 512 tokens
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    outputs = model(**inputs)
    sentiment = outputs.logits.argmax(dim=1).item()
    return 'POSITIVE' if sentiment == 1 else 'NEGATIVE'

In [19]:
# Create a new column for the sentiment of each review
df['sentiment'] = df['Review'].apply(get_sentiment)

In [20]:
# Split the data into training and testing sets
train_df = df.sample(frac=0.8, random_state=0)
test_df = df.drop(train_df.index)

In [21]:
# Analyze positive aspects in negative reviews
negative_reviews = train_df[train_df['sentiment'] == 'NEGATIVE']
positive_aspects = negative_reviews[negative_reviews['Stars'] > 3]['Review']

In [22]:
# Analyze negative aspects in positive reviews
positive_reviews = train_df[train_df['sentiment'] == 'POSITIVE']
negative_aspects = positive_reviews[positive_reviews['Stars'] < 3]['Review']

In [23]:
# View the results
print('Negative Aspects in Positive Reviews:')
print(positive_aspects)


Negative Aspects in Positive Reviews:
307                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               

In [24]:
print('Postivie Aspects in Negative Reviews:')
print(negative_aspects)

Postivie Aspects in Negative Reviews:
276                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  The size and high of the toilet is really good. The instructions for installation were not clear, but even then I was happy with the purchase. The problem comes around two m

In [25]:
# Save the positive and negative aspects to CSV files
positive_aspects.to_csv('positive_aspects.csv', index=False)
negative_aspects.to_csv('negative_aspects.csv', index=False)