In [1]:
import pandas as pd
import nltk
import matplotlib.pyplot as plt
import seaborn as sns
from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

# Download VADER for sentiment analysis
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

# Load datasets
twitter_data = pd.read_csv("Twitter_Data.csv")
user_reviews = pd.read_csv("user_reviews.csv")

# Function for VADER Sentiment Analysis
def vader_sentiment(text):
    if isinstance(text, str):
        return sia.polarity_scores(text)['compound']
    return 0

# Function for TextBlob Sentiment Analysis
def textblob_sentiment(text):
    if isinstance(text, str):
        return TextBlob(text).sentiment.polarity
    return 0

# Apply sentiment analysis
if 'text' in twitter_data.columns:
    twitter_data['sentiment_score'] = twitter_data['text'].apply(vader_sentiment)
    twitter_data['sentiment_label'] = twitter_data['sentiment_score'].apply(lambda x: 'Positive' if x > 0.05 else ('Negative' if x < -0.05 else 'Neutral'))
    twitter_data.to_csv("/mnt/data/Twitter_Data_Sentiment.csv", index=False)
    
    print("Twitter Data Columns:", twitter_data.columns)
    print("Unique Sentiment Labels:", twitter_data['sentiment_label'].unique())
    print("Null Values:", twitter_data['sentiment_label'].isnull().sum())
    
    # Visualization
    plt.figure(figsize=(6, 4))
    sns.countplot(x='sentiment_label', data=twitter_data, palette='coolwarm')
    plt.title("Sentiment Distribution in Twitter Data")
    plt.xlabel("Sentiment")
    plt.ylabel("Count")
    plt.show()

if 'review' in user_reviews.columns:
    user_reviews['sentiment_score'] = user_reviews['review'].apply(textblob_sentiment)
    user_reviews['sentiment_label'] = user_reviews['sentiment_score'].apply(lambda x: 'Positive' if x > 0 else ('Negative' if x < 0 else 'Neutral'))
    user_reviews.to_csv("/mnt/data/User_Reviews_Sentiment.csv", index=False)
    
    print("User Reviews Columns:", user_reviews.columns)
    print("Unique Sentiment Labels:", user_reviews['sentiment_label'].unique())
    print("Null Values:", user_reviews['sentiment_label'].isnull().sum())
    
    # Visualization
    plt.figure(figsize=(6, 4))
    sns.countplot(x='sentiment_label', data=user_reviews, palette='coolwarm')
    plt.title("Sentiment Distribution in User Reviews")
    plt.xlabel("Sentiment")
    plt.ylabel("Count")
    plt.show()

print("Sentiment analysis completed. Output saved and visualized.")


[nltk_data] Downloading package vader_lexicon to C:\Users\Bala
[nltk_data]     Krishnan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Sentiment analysis completed. Output saved and visualized.
