<a href="https://colab.research.google.com/github/MrAdithya21/Real-Time-Reddit-Sentiment-Analysis/blob/main/sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Sentiment Refinement Using NLP Models (VADER & BERT)

In [None]:
import nltk
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
from transformers import pipeline

# Download VADER lexicon
nltk.download('vader_lexicon')

# Initialize VADER Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

# Initialize BERT Sentiment Classifier
bert_sentiment = pipeline("text-classification", model="nlptown/bert-base-multilingual-uncased-sentiment")

# Function to analyze sentiment
def analyze_sentiment(text):
    vader_score = sia.polarity_scores(text)["compound"]
    vader_label = "positive" if vader_score > 0.05 else "negative" if vader_score < -0.05 else "neutral"

    bert_result = bert_sentiment(text)[0]["label"]
    bert_label = "positive" if "5" in bert_result or "4" in bert_result else "negative" if "1" in bert_result else "neutral"

    return vader_label, bert_label

# Sample DataFrame (Replace this with your actual DataFrame)
# df = pd.read_csv("your_data.csv")  # Uncomment this if you have a CSV file

# Apply sentiment analysis
df["VADER_sentiment"], df["BERT_sentiment"] = zip(*df["text"].apply(analyze_sentiment))

# Display refined sentiment analysis results
import matplotlib.pyplot as plt

# Count sentiment occurrences
sentiment_counts = df["BERT_sentiment"].value_counts()

# Plot Sentiment Distribution
plt.figure(figsize=(8, 5))
sentiment_counts.plot(kind="bar", color=["red", "blue", "green"])
plt.xlabel("Sentiment")
plt.ylabel("Count")
plt.title("Sentiment Analysis using VADER & BERT")
plt.show()

# Display DataFrame
print(df.head())


#Named Entity Recognition (NER) for Trending Topics

In [None]:
pip install spacy

In [None]:
import spacy
from collections import Counter

# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

# Function to extract entities
def extract_named_entities(text):
    doc = nlp(text)
    return [ent.text for ent in doc.ents if ent.label_ in ["ORG", "GPE", "PERSON"]]  # ORG = Organizations, GPE = Locations

# Apply entity recognition
df["entities"] = df["text"].apply(extract_named_entities)

# Get top trending topics
entity_list = [entity for entities in df["entities"] for entity in entities]
top_entities = Counter(entity_list).most_common(10)

print("Top 10 Trending Topics:", top_entities)