In [1]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Load your news file
df = pd.read_csv("RAG_new_2024-2025.csv")

# Vote-based sentiment (both raw and normalized)
df['vote_sentiment'] = df['positiveVotes'] - df['negativeVotes']
df['vote_sentiment_norm'] = (df['positiveVotes'] - df['negativeVotes']) / (df['positiveVotes'] + df['negativeVotes'] + 1e-5)

# Prepare VADER
analyzer = SentimentIntensityAnalyzer()

# Combine title and description for text sentiment
df['news_text'] = df['title'].fillna('') + ' ' + df['description'].fillna('')

# VADER compound sentiment score
df['vader_sentiment'] = df['news_text'].apply(lambda x: analyzer.polarity_scores(x)['compound'])

# (Optional) Sentiment category label from VADER
df['sentiment_label'] = df['vader_sentiment'].apply(
    lambda x: 'positive' if x >= 0.05 else ('negative' if x <= -0.05 else 'neutral')
)

# Save result
df.to_csv("RAG_new_2024-2025_with_both_sentiments.csv", index=False)

# Quick look
print(df[['news_text', 'vote_sentiment', 'vote_sentiment_norm', 'vader_sentiment', 'sentiment_label']].head())


                                           news_text  vote_sentiment  \
0  Cardano (ADA) Skyrockets in This Bullish Metri...              -3   
1  4 altcoins to buy under $1 for start of 2024 A...               0   
2  4 altcoins to buy under $1 for start of 2024 A...               0   
3  4 altcoins to buy under $1 for start of 2024 A...               0   
4  4 altcoins to buy under $1 for start of 2024 A...               0   

   vote_sentiment_norm  vader_sentiment sentiment_label  
0            -0.272727           0.7096        positive  
1             0.000000           0.0000         neutral  
2             0.000000           0.0000         neutral  
3             0.000000           0.0000         neutral  
4             0.000000           0.0000         neutral  


In [3]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Load your file
df = pd.read_csv("RAG_new_2024-2025.csv")

# Calculate vote_sentiment
df['vote_sentiment'] = df['positiveVotes'] - df['negativeVotes']

# VADER sentiment (compound score)
analyzer = SentimentIntensityAnalyzer()
df['news_text'] = df['title'].fillna('') + ' ' + df['description'].fillna('')
df['sentiment'] = df['news_text'].apply(lambda x: analyzer.polarity_scores(x)['compound'])

# Helper: map scores to labels
def vote_label(v):
    if v > 0:
        return 'positive'
    elif v < 0:
        return 'negative'
    else:
        return 'neutral'

def sentiment_label(s):
    if s >= 0.05:
        return 'positive'
    elif s <= -0.05:
        return 'negative'
    else:
        return 'neutral'

# Final polarity: if vote and sentiment agree, use that; otherwise, use sentiment
def final_polarity(row):
    v_lab = vote_label(row['vote_sentiment'])
    s_lab = sentiment_label(row['sentiment'])
    if v_lab == s_lab:
        return s_lab  # they agree, use either
    else:
        return s_lab  # if they disagree, follow sentiment

df['polarity'] = df.apply(final_polarity, axis=1)

# Drop helper column if desired
df = df.drop(columns=['news_text'])

# Save new file
df.to_csv("RAG_new_2024-2025_with_sentiment_polarity.csv", index=False)

print("Done! Polarity follows VADER sentiment in case of disagreement. File saved as RAG_new_2024-2025_with_sentiment_polarity.csv")


Done! Polarity follows VADER sentiment in case of disagreement. File saved as RAG_new_2024-2025_with_sentiment_polarity.csv


In [4]:
import pandas as pd

# Load the new file
df = pd.read_csv("RAG_new_2024-2025_with_sentiment_polarity.csv")

# Display column names
print(df.columns.tolist())

['id', 'title', 'description', 'newsDatetime', 'url', 'positiveVotes', 'negativeVotes', 'sourceUrl', 'currencies', 'vote_sentiment', 'sentiment', 'polarity']
