In [1]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer



# I am using the VADER (Valence Aware Dictionary and sEntiment Reasoner) sentiment analysis tool from the nltk library, which is well-suited for social media text.

In [2]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Acer\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

# Loading the Dataset

In [3]:
df = pd.read_csv("C:/Users/Acer/Desktop/Tweets.csv")
print(df.head())

             tweet_id airline_sentiment  airline_sentiment_confidence  \
0  570306133677760513           neutral                        1.0000   
1  570301130888122368          positive                        0.3486   
2  570301083672813571           neutral                        0.6837   
3  570301031407624196          negative                        1.0000   
4  570300817074462722          negative                        1.0000   

  negativereason  negativereason_confidence         airline  \
0            NaN                        NaN  Virgin America   
1            NaN                     0.0000  Virgin America   
2            NaN                        NaN  Virgin America   
3     Bad Flight                     0.7033  Virgin America   
4     Can't Tell                     1.0000  Virgin America   

  airline_sentiment_gold        name negativereason_gold  retweet_count  \
0                    NaN     cairdin                 NaN              0   
1                    NaN    jnar

# Initialize the VADER sentiment intensity analyzer

In [4]:
# Initialize VADER sentiment intensity analyzer
sid = SentimentIntensityAnalyzer()

# Function to analyze sentiment
def analyze_sentiment(post):
    scores = sid.polarity_scores(post)
    return scores['compound']

# Apply the function to the dataset
df['sentiment_score'] = df['text'].apply(analyze_sentiment)

# Classify sentiment based on the compound score
df['sentiment'] = df['sentiment_score'].apply(lambda score: 'positive' if score > 0 else ('negative' if score < 0 else 'neutral'))

# Display the first few rows with sentiment scores
print(df[['text', 'sentiment_score', 'sentiment']].head())



                                                text  sentiment_score  \
0                @VirginAmerica What @dhepburn said.           0.0000   
1  @VirginAmerica plus you've added commercials t...           0.0000   
2  @VirginAmerica I didn't today... Must mean I n...           0.0000   
3  @VirginAmerica it's really aggressive to blast...          -0.5984   
4  @VirginAmerica and it's a really big bad thing...          -0.5829   

  sentiment  
0   neutral  
1   neutral  
2   neutral  
3  negative  
4  negative  


# Getting the count of overall tweets in the data which is Positive , Negative, Neutral .

In [5]:
overall_sentiment = df['sentiment'].value_counts()
print("Overall Sentiment Summary:")
print(overall_sentiment)


Overall Sentiment Summary:
sentiment
positive    6219
negative    5155
neutral     3266
Name: count, dtype: int64
