In [1]:
import warnings
warnings.filterwarnings("ignore")

import nltk
import pandas as pd

from nltk.sentiment import SentimentIntensityAnalyzer

# Download NLTK resources

In [2]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /usr/share/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

# Initialize sentiment analyzer

In [3]:
sia = SentimentIntensityAnalyzer()

# Load dataset

In [4]:
df = pd.read_csv('/kaggle/input/sentiment-analysis-dataset/sentiment_analysis.csv', encoding='latin-1')
df.head()

Unnamed: 0,id,label,tweet
0,1,0,#fingerprint #Pregnancy Test https://goo.gl/h1...
1,2,0,Finally a transparant silicon case ^^ Thanks t...
2,3,0,We love this! Would you go? #talk #makememorie...
3,4,0,I'm wired I know I'm George I was made that wa...
4,5,1,What amazing service! Apple won't even talk to...


In [5]:
df.shape

(7920, 3)

# Perform sentiment analysis on each text in dataset

In [6]:
sentiment_scores = []
for text in df['tweet']:
    scores = sia.polarity_scores(text)
    sentiment_scores.append(scores['compound'])

# Add sentiment scores to dataframe

In [7]:
df['sentiment_score'] = sentiment_scores
df.head()

Unnamed: 0,id,label,tweet,sentiment_score
0,1,0,#fingerprint #Pregnancy Test https://goo.gl/h1...,0.0
1,2,0,Finally a transparant silicon case ^^ Thanks t...,0.7096
2,3,0,We love this! Would you go? #talk #makememorie...,0.6696
3,4,0,I'm wired I know I'm George I was made that wa...,0.2263
4,5,1,What amazing service! Apple won't even talk to...,0.508


# Analyze Sentiment Scores

In [8]:
positive_tweets = df[df['sentiment_score'] > 0]
negative_tweets = df[df['sentiment_score'] < 0]
neutral_tweets = df[df['sentiment_score'] == 0]

# Print the sentiment scores

In [9]:
print("Sentiment Distribution:")
print("Positive Tweets:", len(positive_tweets))
print("Negative Tweets:", len(negative_tweets))
print("Neutral Tweets:", len(neutral_tweets))

Sentiment Distribution:
Positive Tweets: 3180
Negative Tweets: 1363
Neutral Tweets: 3377


# Output the updated dataset

In [10]:
df.to_csv('sentiment_analysis_results.csv', index=False)