In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from pyspark.sql.types import FloatType

# Create a Spark session
spark = SparkSession.builder.appName("SentimentAnalysis").getOrCreate()

# Example data (can be loaded from CSV as well)
data = spark.createDataFrame([
    ("I love this product! It's amazing.",),
    ("This is the worst experience I've ever had.",),
    ("It's okay, not the best but not the worst.",),
    ("Absolutely terrible! I hated every second.",),
    ("I am so excited and happy about this event.",),
    ("The food was decent, nothing special.",)
], ["text"])

# Initialize VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Define a UDF to compute sentiment using VADER
def analyze_sentiment(text):
    sentiment_score = analyzer.polarity_scores(text)['compound']
    return sentiment_score

# Register UDF in Spark
sentiment_udf = udf(analyze_sentiment, FloatType())

# Apply UDF to DataFrame to calculate sentiment score
data_with_sentiment = data.withColumn('sentiment_score', sentiment_udf(data['text']))

# Show the DataFrame with sentiment scores
data_with_sentiment.show(truncate=False)
