# Import and initialize

In [1]:
# Import pandas
import pandas as pd

In [2]:
# Import csv file (source)
df_customer_review = pd.read_csv("customer_reviews.csv")

In [3]:
# Display the first 3 rows of the DataFrame to check the data
df_customer_review.head(3)

Unnamed: 0,ReviewID,CustomerID,ProductID,ReviewDate,Rating,ReviewText
0,1,77,18,12/23/2022,3,"Average experience, nothing special."
1,2,80,19,12/25/2024,5,The quality is top-notch.
2,3,50,13,1/26/2023,4,Five stars for the quick delivery.


In [4]:
# Get the number of rows and columns in the DataFrame
df_customer_review.shape

(1363, 6)

In [5]:
# Check the data types and non-null counts for each column
df_customer_review.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1363 entries, 0 to 1362
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   ReviewID    1363 non-null   int64 
 1   CustomerID  1363 non-null   int64 
 2   ProductID   1363 non-null   int64 
 3   ReviewDate  1363 non-null   object
 4   Rating      1363 non-null   int64 
 5   ReviewText  1363 non-null   object
dtypes: int64(4), object(2)
memory usage: 64.0+ KB


In [None]:
# Install the Natural Language Toolkit (nltk) through pip
!pip install nltk

In [7]:
# Import nltk library and SentimentIntensityAnalyzer for sentiment analysis
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [None]:
# Download the VADER lexicon for sentiment analysis
nltk.download('vader_lexicon')

In [9]:
# Initialize the Sentiment Intensity Analyzer for sentiment analysis
sia = SentimentIntensityAnalyzer()

# Try the analyzer:
Check all results to see how sentiment analyzer works

In [10]:
# Sample texts to evaluate using the sentiment analysis tool
text1 = "I am very happy, your service is brilliant!"
text2 = "Five stars for the quick delivery."
text3 = "I should say I am really happy with this service, but it's brilliantly FUCKED!"
text4 = "The quality is top-notch."

In [11]:
# Analyze sentiment of text1, text2, text3, and text4 using the sentiment analyzer
sentiment_scores1 = sia.polarity_scores(text1)
sentiment_scores2 = sia.polarity_scores(text2)
sentiment_scores3 = sia.polarity_scores(text3)
sentiment_scores4 = sia.polarity_scores(text4)

In [12]:
# Print the sentiment analysis results for text1
print(sentiment_scores1)

{'neg': 0.0, 'neu': 0.382, 'pos': 0.618, 'compound': 0.8436}


In [13]:
# Print the sentiment analysis results for text2
print(sentiment_scores2)

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


In [14]:
# Print the sentiment analysis results for text3
print(sentiment_scores3)

{'neg': 0.294, 'neu': 0.368, 'pos': 0.338, 'compound': -0.1268}


In [15]:
# Print the sentiment analysis results for text4
print(sentiment_scores4)

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


Note: It seems that it is not functioning properly. We need to add some words to the dictionary and preprocess the text due to some minor issues.

# Update the analyzer:

In [16]:
# Define custom sentiment scores for specific words/phrases and add them to the VADER lexicon
custom_words = {
    'notch': 1.5,
    'top notch': 2.0,
    'top-notch': 2.0,
    'star': 2.0, 
    'stars': 2.5, 
}
for word, score in custom_words.items():
    sia.lexicon[word] = score

In [17]:
# Uncomment to print the updated VADER lexicon with custom sentiment scores
#print(sia.lexicon)

In [18]:
# Define additional sample texts for sentiment analysis
text5 = "Five stars for the quick delivery."
text6 = "The quality is top-notch."

In [19]:
# Replace hyphens with spaces in text6 to ensure proper sentiment analysis
text6 = text6.replace("-", " ")

In [20]:
# Show updated text6 after replacing hyphens with spaces for better analysis
text6

'The quality is top notch.'

In [21]:
# Analyze sentiment of text5 and text6 using the sentiment analyzer
sentiment_scores5 = sia.polarity_scores(text5)
sentiment_scores6 = sia.polarity_scores(text6)

In [22]:
# Print the sentiment analysis results for text5
print(sentiment_scores5)

{'neg': 0.0, 'neu': 0.588, 'pos': 0.412, 'compound': 0.5423}


In [23]:
# Print the sentiment analysis results for text6
print(sentiment_scores6)

{'neg': 0.0, 'neu': 0.411, 'pos': 0.589, 'compound': 0.5106}


Note: It appears that everything is functioning properly.

# Define functions for sentiment analysis and related tasks

In [24]:
def calculate_sentiment_score(text):
    """
    Calculate the sentiment score of the input text using VADER sentiment analysis.
    This function preprocesses the text by replacing hyphens with spaces and 
    returns the compound sentiment score, which ranges from -1 (negative) to +1 (positive).
    """
    # Replace hyphens with spaces to improve analysis
    text = text.replace("-", " ")
    # Calculate the sentiment score using VADER and return the compound score
    sentiment_score = sia.polarity_scores(text)
    return sentiment_score["compound"]

In [25]:
def classify_sentiment(rating, sentiment_score):
    """
    Classifies the sentiment of a given rating based on a sentiment score.
    It returns categories such as 'Positive', 'Slightly Positive', 'Slightly Negative', 'Negative', or 'Neutral' based on the rating and sentiment score.
    """
    if sentiment_score > 0.05:  # Positive Sentiment
        if rating >= 4:
            return 'Positive'  # High rating and positive sentiment
        elif rating == 3:
            return 'Slightly Positive'  # Neutral rating but positive sentiment
        else:
            return 'Slightly Negative'  # Low rating but positive sentiment
            
    elif sentiment_score < -0.05:  # Negative Sentiment
        if rating <= 2:
            return 'Negative'  # Low rating and negative sentiment
        elif rating == 3:
            return 'Slightly Negative'  # Neutral rating but negative sentiment
        else:
            return 'Slightly Positive'  # High rating but negative sentiment
            
    else:  # Neutral Sentiment
        if rating >= 4:
            return 'Positive'  # High rating with neutral sentiment
        elif rating <= 2:
            return 'Negative'  # Low rating with neutral sentiment
        else:
            return 'Neutral'  # Neutral rating and neutral sentiment

In [26]:
def sentiment_range(sentiment_score):
    """
    Categorize the sentiment score into predefined ranges.
    Returns a string indicating whether the sentiment is very positive, slightly positive, slightly negative, or very negative based on the score.
    """
    if sentiment_score >= 0.5:
        return '0.5 to 1.0'  # Very positive sentiment
    elif 0.0 <= sentiment_score < 0.5:
        return '0.0 to 0.49'  # Slightly positive sentiment
    elif -0.5 <= sentiment_score < 0.0:
        return '-0.49 to 0.0'  # Slightly negative sentiment
    else:
        return '-1.0 to -0.5'  # Very negative sentiment

# Apply functions and export csv file

In [27]:
# Calculate the sentiment score for each review in the DataFrame and store it in a new column 'SentimentScore'
df_customer_review["SentimentScore"] = df_customer_review["ReviewText"].apply(calculate_sentiment_score)

In [28]:
# Display the first three rows of the updated DataFrame
df_customer_review.head(3)

Unnamed: 0,ReviewID,CustomerID,ProductID,ReviewDate,Rating,ReviewText,SentimentScore
0,1,77,18,12/23/2022,3,"Average experience, nothing special.",-0.3089
1,2,80,19,12/25/2024,5,The quality is top-notch.,0.5106
2,3,50,13,1/26/2023,4,Five stars for the quick delivery.,0.5423


In [29]:
# Classify sentiment for each review based on the 'Rating' and 'SentimentScore', and store the result in a new column 'SentimentClassification'
df_customer_review["SentimentClassification"] = df_customer_review.apply(lambda review: classify_sentiment(review['Rating'], review['SentimentScore']), axis=1)

In [30]:
# Display the first five rows of the DataFrame to review the structure and contents, including the new columns 'SentimentScore' and 'SentimentClassification'
df_customer_review.head(5)

Unnamed: 0,ReviewID,CustomerID,ProductID,ReviewDate,Rating,ReviewText,SentimentScore,SentimentClassification
0,1,77,18,12/23/2022,3,"Average experience, nothing special.",-0.3089,Slightly Negative
1,2,80,19,12/25/2024,5,The quality is top-notch.,0.5106,Positive
2,3,50,13,1/26/2023,4,Five stars for the quick delivery.,0.5423,Positive
3,4,78,15,4/21/2023,3,"Good quality, but could be cheaper.",0.2382,Slightly Positive
4,5,64,2,7/16/2022,3,"Average experience, nothing special.",-0.3089,Slightly Negative


In [31]:
# Categorize the sentiment score into predefined ranges and store the result in a new column 'SentimentRange'
df_customer_review['SentimentRange'] = df_customer_review['SentimentScore'].apply(sentiment_range)

In [32]:
# Display the DataFrame "df_customer_review"
df_customer_review.head()

Unnamed: 0,ReviewID,CustomerID,ProductID,ReviewDate,Rating,ReviewText,SentimentScore,SentimentClassification,SentimentRange
0,1,77,18,12/23/2022,3,"Average experience, nothing special.",-0.3089,Slightly Negative,-0.49 to 0.0
1,2,80,19,12/25/2024,5,The quality is top-notch.,0.5106,Positive,0.5 to 1.0
2,3,50,13,1/26/2023,4,Five stars for the quick delivery.,0.5423,Positive,0.5 to 1.0
3,4,78,15,4/21/2023,3,"Good quality, but could be cheaper.",0.2382,Slightly Positive,0.0 to 0.49
4,5,64,2,7/16/2022,3,"Average experience, nothing special.",-0.3089,Slightly Negative,-0.49 to 0.0


In [33]:
# Save the final dataframe into a CSV file with UTF-8-sig encoding for Excel
df_customer_review.to_csv("customer_analyzed_reviews.csv", index=False, encoding="utf-8-sig")