# Task-04


**Analyze and visualize sentiment patterns in social media data to understand public opinion and attitudes towards specific topics or brands.Sample Dataset :- https://www.kaggle.com/datasets/jp797498e/twitter-entity-sentiment-analysis**

In [None]:
import os
import pandas as pd
import re
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import nltk
from nltk.corpus import stopwords
sns.set_theme(style="whitegrid")

In [None]:
path = "/kaggle/input/twitter-entity-sentiment-analysis"
print("Available files:", os.listdir(path))
csv_files = [fname for fname in os.listdir(path) if fname.endswith('.csv')]
if not csv_files:
    raise FileNotFoundError("No CSV files found in the specified directory.")
data_file = os.path.join(path, csv_files[0])

In [None]:
df = pd.read_csv(data_file, header=None)
df.columns = ['tweet_id', 'entity', 'sentiment', 'text']
print(df.head())

In [None]:
print(df.info())

In [None]:
print("Sentiment counts:\n", df['sentiment'].value_counts())
print("Entity counts:\n", df['entity'].value_counts())

In [None]:
def preprocess_text(text):
    text = re.sub(r"http\S+|@\w+|#\w+", "", str(text))
    text = re.sub(r"[^A-Za-z\s]", "", text)
    return text.lower().strip()

In [None]:
df['clean_text'] = df['text'].apply(preprocess_text)
print(df[['text','clean_text']].head())

In [None]:
# Plot sentiment distribution
plt.figure(figsize=(7, 4))
sns.countplot(x='sentiment', data=df, palette='pastel')
plt.title('Distribution of Tweet Sentiments')
plt.xlabel('Sentiment')
plt.ylabel('Tweet Count')
plt.tight_layout()
plt.show()

In [None]:
# Analyze top entities
top_entities = df['entity'].value_counts().nlargest(10).index
plt.figure(figsize=(12, 6))
sns.countplot(data=df[df['entity'].isin(top_entities)],x='entity',hue='sentiment',palette='muted')
plt.title('Sentiment Breakdown for Top 10 Entities')
plt.xlabel('Entity')
plt.ylabel('Tweet Count')
plt.xticks(rotation=45)
plt.legend(title='Sentiment')
plt.tight_layout()
plt.show()

In [None]:
stop_words = set(stopwords.words('english'))
for sentiment in df['sentiment'].unique():
    words = ' '.join(df[df['sentiment'] == sentiment]['clean_text']).split()
    filtered_words = [word for word in words if word not in stop_words]
    counts = Counter(filtered_words)
    common = counts.most_common(10)
    word_df = pd.DataFrame(common, columns=['word', 'count'])
    plt.figure(figsize=(10,5))
    sns.barplot(data=word_df, x='word', y='count', palette='pastel')
    plt.title(f"Top 10 Words in {sentiment} Tweets")
    plt.xlabel("Word")
    plt.ylabel("Count")
    plt.xticks(rotation=45)
    plt.show()