In [None]:
import pandas as pd
import numpy as np
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib

# Initialize VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Function to compute VADER sentiment score
def vader_sentiment_score(text):
    sentiment_dict = analyzer.polarity_scores(text)
    return sentiment_dict['compound']  # VADER returns a compound score for overall sentiment

# Function to clean text and remove special characters
def remove_special_characters(text):
    return re.sub(r'[^a-zA-Z0-9\s]', '', text)

# Updated function to predict sentiment based on stars and comments
def predict_sentiment_combined(stars, comment):
    # Clean the comment
    comment_cleaned = remove_special_characters(comment)
    vader_score = vader_sentiment_score(comment_cleaned)
    
    # Decision mechanism
    if vader_score >= 0.5 and stars in [4, 5]:  # Positive sentiment and high stars
        classification = "positive"
    elif vader_score <= -0.5 and stars in [4, 5]:  # Negative sentiment despite high stars
        classification = "negative"
    elif vader_score <= -0.5 or stars in [0, 1]:  # Negative sentiment or low stars
        classification = "negative"
    elif -0.5 < vader_score < 0.5:  # Neutral sentiment
        classification = "neutral"
    else:  # Fallback for unclear cases
        classification = "neutral"
    
    # Return detailed results
    return {
        "star_rating": stars,
        "comment_sentiment_score": vader_score,
        "final_classification": classification
    }

# Example data for prediction
user_stars = 5
user_comment = "I hate this app"

# Make prediction
result = predict_sentiment_combined(user_stars, user_comment)

# Display results
print(f"Star Rating: {result['star_rating']}")
print(f"Comment Sentiment Score: {result['comment_sentiment_score']}")
print(f"Final Classification: {result['final_classification']}")

# Optional: Example integration with TF-IDF for added context
# Assuming TF-IDF has been trained on a dataset and saved as a .pkl file
try:
    # Load saved TF-IDF vectorizer
    tfidf = joblib.load('tfidf_vectorizer.pkl')
    comment_vectorized = tfidf.transform([remove_special_characters(user_comment)])
    
    print("TF-IDF vectorization complete.")
    print("Vectorized comment shape:", comment_vectorized.shape)
except FileNotFoundError:
    print("TF-IDF vectorizer not found. Ensure it has been trained and saved.")

# Integration with a chosen model for additional predictions (optional)
# Assuming a saved model (e.g., Random Forest or SVM) is available
try:
    model = joblib.load('hope_model.pkl')  # Replace with the correct model filename
    input_features = np.hstack((comment_vectorized.toarray(), np.array([[user_stars]]), np.array([[result['comment_sentiment_score']]])))
    predicted_class = model.predict(input_features)
    
    print(f"Model Predicted Class: {predicted_class[0]}")
except FileNotFoundError:
    print("Trained model not found. Ensure it has been trained and saved.")

# Display user-friendly results (for a potential user interface)
print("\nUser-friendly result:")
print(f"Stars: {result['star_rating']} | Sentiment Score: {result['comment_sentiment_score']:.2f} | Classification: {result['final_classification']}")
