# Sentiment Analysis 

## Importing libraries 

In [23]:
import pandas as pd
import pyodbc
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer 


In [24]:
%pip install SQLAlchemy
from sqlalchemy import create_engine

Note: you may need to restart the kernel to use updated packages.


In [25]:
# Download the VADER lexicon for sentiment analysis
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\alsan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

## Define a functions 

In [26]:
# Functoon to fetch data from a SQL database using a SQL query
def fetch_data_from_sql():
    # Define the connection string with parameters for the SQL Server database connection
    conn_str = ( 
        "Driver={SQL Server};"
        "Server=shahad\SQLEXPRESS;"
        "Database=MarketingAnalytics;"
        "Trusted_Connection=yes;"
    )
     # Establish the connection to the database
    conn = pyodbc.connect(conn_str)
    # Define the SQL query to fetch customer reviews data
    query = "SELECT ReviewID, CustomerID, ProductID, ReviewDate, Rating, ReviewText FROM fact_customer_reviews"
    # Execute the query and fetch the data into a DataFrame
    df = pd.read_sql(query, conn)
    # Close the connection to free up resources
    conn.close()
    # Return the fetched data as a DataFrame
    return df

In [27]:
# Fetch the customer reviews data from the SQL database
customer_reviews = fetch_data_from_sql()

  df = pd.read_sql(query, conn)


In [28]:
# Initialize the VADER sentiment intensity analyzer for analyzing the sentiment of text data
sia = SentimentIntensityAnalyzer()


In [29]:
# Function to calculate sentiment scores using VADER
def calculate_sentiment(review):
    sentiment = sia.polarity_scores(review)
    # Return the compound score, which is a normalized score between -1 (most negative) and 1 (most positive)
    return sentiment['compound']

In [30]:
# Function to categorize sentiment using both the sentiment score and the review rating
def categorize_sentiment(score, rating):
    # Use both the text sentiment score and the numerical rating to determine sentiment category
    if score > 0.05:
        if rating >= 4:
            return 'Positive'
        elif rating == 3:
            return 'Mixed Positive'
        else:
            return 'Mixed Negative'
    elif score < -0.05:
        if rating <= 2:
            return 'Negative'
        elif rating == 3:
            return 'Mixed Negative'
        else:
            return 'Mixed Positive'
    else:
        if rating >= 4:
            return 'Positive'
        elif rating <= 2:
            return 'Negative'
        else:
            return 'Neutral'
    

In [31]:
# Function to bucket sentiment scores into text range
def sentiment_bucket(score):
    if score >= 0.05:
        return '0.5 to 1.0'
    elif 0.0 <= score < 0.5:
        return '0.0 to 0.49'
    elif -0.5 <= score < 0.0:
        return '-0.49 to 0.0'
    else:
        return '-1.0 to -0.5'

In [32]:
# Apply sentiment analysis to calculate sentiment scores for each review
customer_reviews['SentimentScore'] = customer_reviews['ReviewText'].apply(calculate_sentiment)


In [33]:
# Apply sentiment categorization using both text and rating
customer_reviews['SentimentCategory'] = customer_reviews.apply(lambda row: categorize_sentiment(row['SentimentScore'], row['Rating']), axis=1)

In [34]:
# Apply sentiment bucketing to categorize scores into defined ranges
customer_reviews['SentimentBucket'] = customer_reviews['SentimentScore'].apply(sentiment_bucket)

In [37]:
# Display the first few rows of the DataFrame with sentiment scores, categories, and buckets
print(customer_reviews.head())

   ReviewID  CustomerID  ProductID  ReviewDate  Rating  \
0         1          77         18  2023-12-23       3   
1         2          80         19  2024-12-25       5   
2         3          50         13  2025-01-26       4   
3         4          78         15  2025-04-21       3   
4         5          64          2  2023-07-16       3   

                             ReviewText  SentimentScore SentimentCategory  \
0  Average experience, nothing special.         -0.3089    Mixed Negative   
1            The quality is  top-notch.          0.0000          Positive   
2    Five stars for the quick delivery.          0.0000          Positive   
3   Good quality, but could be cheaper.          0.2382    Mixed Positive   
4  Average experience, nothing special.         -0.3089    Mixed Negative   

  SentimentBucket  
0    -0.49 to 0.0  
1     0.0 to 0.49  
2     0.0 to 0.49  
3      0.5 to 1.0  
4    -0.49 to 0.0  


In [None]:
# Save the DataFrame with sentiment scores, categories, and buckets to a new CSV file
customer_reviews.to_csv('fact_customer_reviews_with_sentiment.csv', index = False)