In [1]:
import pandas as pd
import pyodbc
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [2]:
def fetch_data_from_sql(server, database, query):
    conn_str = (f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server};DATABASE={database};Trusted_Connection=yes;')
    conn = pyodbc.connect(conn_str)

    df = pd.read_sql_query(query, conn)
    conn.close()
    return df
   
query = """
    SELECT
    ReviewID    as 'Review ID',
    CustomerID  as 'Customer ID',
    ProductID   as 'Product ID',
    CONVERT( date, ReviewDate ) as 'Review Date',
    Rating,
    REPLACE(ReviewText, '  ', ' ') as 'Review Text'
    FROM [MarketingData].[dbo].[customer_reviews]
    """   


reviews_df = fetch_data_from_sql ('AKSHY-PC', 'MarketingData', query)

reviews_df.head(10)

  df = pd.read_sql_query(query, conn)


Unnamed: 0,Review ID,Customer ID,Product ID,Review Date,Rating,Review Text
0,1,77,18,2023-12-23,3,"Average experience, nothing special."
1,2,80,19,2024-12-25,5,The quality is top-notch.
2,3,50,13,2025-01-26,4,Five stars for the quick delivery.
3,4,78,15,2025-04-21,3,"Good quality, but could be cheaper."
4,5,64,2,2023-07-16,3,"Average experience, nothing special."
5,6,81,1,2025-12-21,4,Customer support was very helpful.
6,7,16,1,2024-01-29,3,"Average experience, nothing special."
7,8,55,8,2024-08-15,5,The quality is top-notch.
8,9,3,13,2023-09-01,4,"I love this product, will buy again!"
9,10,78,6,2024-06-17,5,"Excellent product, highly recommend!"


In [3]:
sia = SentimentIntensityAnalyzer()

def analyze_sentiment(text):
    text = str(text)
    return sia.polarity_scores(text)['compound']

reviews_df['Sentiment Score'] = reviews_df['Review Text'].apply(analyze_sentiment)

reviews_df.head(10)


Unnamed: 0,Review ID,Customer ID,Product ID,Review Date,Rating,Review Text,Sentiment Score
0,1,77,18,2023-12-23,3,"Average experience, nothing special.",-0.3089
1,2,80,19,2024-12-25,5,The quality is top-notch.,0.0
2,3,50,13,2025-01-26,4,Five stars for the quick delivery.,0.0
3,4,78,15,2025-04-21,3,"Good quality, but could be cheaper.",0.2382
4,5,64,2,2023-07-16,3,"Average experience, nothing special.",-0.3089
5,6,81,1,2025-12-21,4,Customer support was very helpful.,0.6997
6,7,16,1,2024-01-29,3,"Average experience, nothing special.",-0.3089
7,8,55,8,2024-08-15,5,The quality is top-notch.,0.0
8,9,3,13,2023-09-01,4,"I love this product, will buy again!",0.6696
9,10,78,6,2024-06-17,5,"Excellent product, highly recommend!",0.7773


In [4]:
def categorize_sentiment(score, rating):
    if score >= 0.05:
        if rating >= 4:
            return "Positive"
    
        elif rating == 3:

            return 'Mixed Positive'
        else:
            
            return 'Mixed negative'      
    if score <= -0.05:
        if rating <= 2:
            return "Negative"
        elif rating == 3:
            return 'Mixed Negative'
        else:
            return 'Mixed Positive'
    else:
        if rating >= 4:
            return 'Mixed Positive'
        elif rating == 3:
            return 'Neutral'
        else:
            return 'Mixed Negative'
       
reviews_df['Sentiment Category'] = reviews_df.apply(lambda row: categorize_sentiment(row['Sentiment Score'], row['Rating']), axis=1)
reviews_df.head(10)

Unnamed: 0,Review ID,Customer ID,Product ID,Review Date,Rating,Review Text,Sentiment Score,Sentiment Category
0,1,77,18,2023-12-23,3,"Average experience, nothing special.",-0.3089,Mixed Negative
1,2,80,19,2024-12-25,5,The quality is top-notch.,0.0,Mixed Positive
2,3,50,13,2025-01-26,4,Five stars for the quick delivery.,0.0,Mixed Positive
3,4,78,15,2025-04-21,3,"Good quality, but could be cheaper.",0.2382,Mixed Positive
4,5,64,2,2023-07-16,3,"Average experience, nothing special.",-0.3089,Mixed Negative
5,6,81,1,2025-12-21,4,Customer support was very helpful.,0.6997,Positive
6,7,16,1,2024-01-29,3,"Average experience, nothing special.",-0.3089,Mixed Negative
7,8,55,8,2024-08-15,5,The quality is top-notch.,0.0,Mixed Positive
8,9,3,13,2023-09-01,4,"I love this product, will buy again!",0.6696,Positive
9,10,78,6,2024-06-17,5,"Excellent product, highly recommend!",0.7773,Positive
