In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.probability import FreqDist
from nltk.corpus import stopwords
from wordcloud import WordCloud
import gensim
from gensim import corpora
from gensim.models import LdaModel
from textblob import TextBlob

df=pd.read_csv("data.csv")
# Tokenize the text data
nltk.download('punkt')
df['post_tokens'] = df['post_tokens'].apply(word_tokenize)

# Remove stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
df['post_tokens'] = df['post_tokens'].apply(lambda tokens: [word for word in tokens if word.lower() not in stop_words])

# Sentiment Analysis
def calculate_sentiment(text):
    blob = TextBlob(' '.join(text))
    return blob.sentiment.polarity

df['sentiment'] = df['post_tokens'].apply(calculate_sentiment)

# Plot sentiment distribution
plt.figure(figsize=(8, 6))
sns.histplot(data=df, x='sentiment', bins=20, kde=True)
plt.title("Distribution of Sentiment")
plt.xlabel("Sentiment Polarity")
plt.ylabel("Frequency")
plt.show()

# Word Cloud
all_words = [word for tokens in df['post_tokens'] for word in tokens]
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(all_words))

plt.figure(figsize=(10, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title("Word Cloud")
plt.show()

# Topic Modeling with LDA
dictionary = corpora.Dictionary(df['post_tokens'])
corpus = [dictionary.doc2bow(tokens) for tokens in df['post_tokens']]
lda_model = LdaModel(corpus, num_topics=5, id2word=dictionary, passes=15)

topics = lda_model.print_topics(num_words=5)
for topic in topics:
    print(topic)

# Additional steps for visualizing topics can be added using libraries like pyLDAvis

# Distribution of Target Communities and Classifications
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='Target Community', hue='Classification')
plt.title("Distribution of Target Communities and Classifications")
plt.xlabel("Target Community")
plt.ylabel("Frequency")
plt.legend(title="Classification")
plt.xticks(rotation=45)
plt.show()
