In [None]:
!pip install scikit-learn

In [7]:
import pyodbc
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
from datetime import datetime
from textblob import TextBlob
from sklearn.metrics import classification_report, accuracy_score

In [None]:
# ---- CONNECT TO SQL SERVER ----
connection = (
    "DRIVER={SQL Server};"
    "SERVER=TPBLUE-IST;"                  
    "DATABASE=CustomerFeedbackDB;"         
    "Trusted_Connection=yes;"              
)

# Establish the database connection
dbcon = pyodbc.connect(connection)

# Create a cursor to execute SQL queries
cursor = dbcon.cursor()

# SQL query to fetch feedback text and recipe name
query = """
    SELECT [recipe_name], [feedback_text]
    FROM [CustomerFeedbackDB].[dbo].[ft_user_feedback]
"""

# Execute the query and fetch all results
cursor.execute(query)
feedbacks = cursor.fetchall()

# Close the database connection
dbcon.close()

# Debugging: Print number of feedback entries fetched
print(f"Number of feedback entries fetched: {len(feedbacks)}")


In [None]:
# List to store sentiment results
data = []

# Perform sentiment analysis on each feedback
for feedback in feedbacks:
    recipe_name = feedback[0].strip()  # Trim leading/trailing spaces
    feedback_text = feedback[1].strip()  # Trim leading/trailing spaces
    blob = TextBlob(feedback_text)
    
    # Get sentiment polarity
    sentiment = blob.sentiment.polarity
    sentiment_Categories = ''
    
    if sentiment > 0: 
        sentiment_Categories = "Positive"
    elif sentiment < 0:
        sentiment_Categories = "Negative"
    else:
        sentiment_Categories = "Neutral"
    
    # Append the result as a dictionary
    data.append({
        'recipe_name': recipe_name,
        'feedback_text': feedback_text,
        'sentiment': sentiment_Categories
    })

# Convert the list of dictionaries to a pandas DataFrame
df = pd.DataFrame(data)

# Debugging: Print number of entries in the DataFrame
print(f"Number of sentiment entries: {len(df)}")


In [None]:
# Example ground truth DataFrame
ground_truth = pd.DataFrame({
    'recipe_name': ['Recipe 1', 'Recipe 2', 'Recipe 3'],  # Example recipe names
    'feedback_text': ['Feedback text 1', 'Feedback text 2', 'Feedback text 3'],  # Example feedback texts
    'sentiment': ['Positive', 'Negative', 'Neutral']  # Example actual sentiments
})

# Debugging: Print the ground truth DataFrame
print("Ground Truth DataFrame:")
print(ground_truth)

# Ensure the recipe names and feedback texts are trimmed
ground_truth['recipe_name'] = ground_truth['recipe_name'].str.strip()
ground_truth['feedback_text'] = ground_truth['feedback_text'].str.strip()


In [None]:
# Merge the predicted sentiments with the ground truth for evaluation
merged_df = pd.merge(df, ground_truth, on=['recipe_name', 'feedback_text'], suffixes=('_pred', '_true'))

# Debugging: Print the merged DataFrame
print("Merged DataFrame:")
print(merged_df)

# Check if the merged DataFrame is empty
if merged_df.empty:
    print("No matching records found between predicted sentiments and ground truth.")
else:
    # Check unique values in true and predicted sentiments
    print("Unique true sentiments:", merged_df['sentiment_true'].unique())
    print("Unique predicted sentiments:", merged_df['sentiment_pred'].unique())

    # Calculate and print the performance metrics
    accuracy = accuracy_score(merged_df['sentiment_true'], merged_df['sentiment_pred'])
    print(f'Accuracy: {accuracy:.2f}')

    # Print the classification report with zero_division handling
    print("Classification Report:")
    print(classification_report(
        merged_df['sentiment_true'], 
        merged_df['sentiment_pred'], 
        target_names=['Positive', 'Negative', 'Neutral'],
        zero_division=0  # Adjust for undefined metrics
    ))


In [None]:
# Debugging: Print the merged DataFrame
print("Merged DataFrame:")
print(merged_df)

# Check if the merged DataFrame is empty
if merged_df.empty:
    print("No matching records found between predicted sentiments and ground truth. The merged DataFrame is empty.")
else:
    # Check unique values in true and predicted sentiments
    print("Unique true sentiments:", merged_df['sentiment_true'].unique())
    print("Unique predicted sentiments:", merged_df['sentiment_pred'].unique())

    # Calculate and print the performance metrics
    accuracy = accuracy_score(merged_df['sentiment_true'], merged_df['sentiment_pred'])
    print(f'Accuracy: {accuracy:.2f}')

    # Print the classification report with zero_division handling
    print("Classification Report:")
    print(classification_report(
        merged_df['sentiment_true'], 
        merged_df['sentiment_pred'], 
        target_names=['Positive', 'Negative', 'Neutral'],
        zero_division=0  # Adjust for undefined metrics
    ))

    # ---- SUMMARY ----
    # Create a summary dataframe showing feedback count and sentiment distribution by recipe
    summary_df = merged_df.pivot_table(
        index='recipe_name', 
        columns='sentiment_true',  # Using actual sentiments for summary
        aggfunc='size', 
        fill_value=0
    ).reset_index()

    # Print the summary DataFrame
    if summary_df.empty:
        print("Summary DataFrame is empty. No data available for sentiment distribution by recipe.")
    else:
        print("Summary of Sentiment Distribution by Recipe:")
        print(summary_df)

        # ---- VISUALIZATION ----
        # Create a bar plot for sentiment distribution
        plt.figure(figsize=(10, 6))
        sns.countplot(data=merged_df, x='sentiment_true', order=['Positive', 'Negative', 'Neutral'])
        plt.title('Sentiment Distribution')
        plt.xlabel('Sentiment')
        plt.ylabel('Count')
        plt.xticks(rotation=45)
        plt.tight_layout()

        # Save the plot
        plt.savefig('sentiment_distribution.png')
        plt.show()

        # Save merged DataFrame to an Excel file
        output_file = 'merged_sentiment_analysis.xlsx'
        merged_df.to_excel(output_file, index=False)
        print(f'Results saved to {output_file}')
