# Twitter Entity Sentiment Analysis
This notebook analyzes and visualizes sentiment patterns in tweets to understand public opinion and attitudes towards specific topics or brands.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import re
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer


In [None]:
# Load dataset (replace with your file path if different)
df = pd.read_csv('twitter_training.csv', header=None)
df.columns = ['ID', 'Entity', 'Sentiment', 'Tweet']
df.head()

In [None]:
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = re.sub(r"http\S+|@\w+|#\w+|[^A-Za-z0-9 ]", '', str(text))
    words = text.lower().split()
    return ' '.join([w for w in words if w not in stop_words])

df['clean_tweet'] = df['Tweet'].apply(clean_text)
df[['Tweet', 'clean_tweet']].head()

In [None]:
sns.countplot(data=df, x='Sentiment', order=df['Sentiment'].value_counts().index)
plt.title('Sentiment Distribution')
plt.xticks(rotation=45)
plt.show()

In [None]:
entity_sentiment = df.groupby('Entity')['Sentiment'].value_counts().unstack().fillna(0)
entity_sentiment.plot(kind='barh', stacked=True, figsize=(10, 8), colormap='coolwarm')
plt.title('Sentiment Breakdown by Entity')
plt.xlabel('Tweet Count')
plt.ylabel('Entity')
plt.legend(title='Sentiment')
plt.tight_layout()
plt.show()

In [None]:
for sentiment in ['Positive', 'Negative', 'Neutral']:
    text = ' '.join(df[df['Sentiment'] == sentiment]['clean_tweet'])
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(f"{sentiment} Tweets Word Cloud")
    plt.show()

In [None]:
sia = SentimentIntensityAnalyzer()
df['vader_score'] = df['clean_tweet'].apply(lambda x: sia.polarity_scores(x)['compound'])

def label(score):
    if score >= 0.05: return 'Positive'
    elif score <= -0.05: return 'Negative'
    else: return 'Neutral'

df['vader_sentiment'] = df['vader_score'].apply(label)
df[['clean_tweet', 'vader_score', 'vader_sentiment']].head()