# VADER-Based Sentiment Analysis on Tweets
This notebook analyzes tweet sentiments using the VADER sentiment analyzer from NLTK.

In [None]:
import numpy as np
import seaborn as sns
import pandas as pd
import nltk
import matplotlib.pyplot as plt
from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm

plt.style.use("ggplot")

In [None]:
# Load the dataset
df = pd.read_csv("twitter_data_500.csv", header=None, names=["textID", "text"])
print(df.head())

In [None]:
nltk.download("vader_lexicon")

In [None]:
# Initialize VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Analyze each tweet and store results
results = {}
for i, row in tqdm(df.iterrows(), total=len(df)):
    text = row['text']
    myid = row['textID']
    if isinstance(text, str):
        results[myid] = sia.polarity_scores(text)
    else:
        results[myid] = {"neg": 0, "neu": 0, "pos": 0, "compound": 0}

In [None]:
# Convert the results to DataFrame
vader = pd.DataFrame(results).T
vader = vader.reset_index().rename(columns={"index": "textID"})
vader = vader.merge(df, on="textID", how="left")

# Labeling sentiment based on compound score
vader['Label'] = vader['compound'].apply(lambda c: 'positive' if c >= 0.05 else ('negative' if c <= -0.05 else 'neutral'))
print(vader.head())

In [None]:
# Visualize sentiment distribution
fig, axs = plt.subplots(1, 3, figsize=(15, 4))
sns.barplot(data=vader, x="Label", y="pos", ax=axs[0])
sns.barplot(data=vader, x="Label", y="neu", ax=axs[1])
sns.barplot(data=vader, x="Label", y="neg", ax=axs[2])

axs[0].set_title("Positive")
axs[1].set_title("Neutral")
axs[2].set_title("Negative")
plt.tight_layout()
plt.show()